diff options
659 files changed, 25760 insertions, 5586 deletions
@@ -37,6 +37,7 @@ Andrew Murray <amurray@thegoodpenguin.co.uk> <amurray@embedded-bits.co.uk> Andrew Murray <amurray@thegoodpenguin.co.uk> <andrew.murray@arm.com> Andrew Vasquez <andrew.vasquez@qlogic.com> Andrey Ryabinin <ryabinin.a.a@gmail.com> <a.ryabinin@samsung.com> +Andrey Ryabinin <ryabinin.a.a@gmail.com> <aryabinin@virtuozzo.com> Andy Adamson <andros@citi.umich.edu> Antoine Tenart <atenart@kernel.org> <antoine.tenart@bootlin.com> Antoine Tenart <atenart@kernel.org> <antoine.tenart@free-electrons.com> @@ -179,6 +180,8 @@ Kees Cook <keescook@chromium.org> <kees.cook@canonical.com> Kees Cook <keescook@chromium.org> <keescook@google.com> Kees Cook <keescook@chromium.org> <kees@outflux.net> Kees Cook <keescook@chromium.org> <kees@ubuntu.com> +Keith Busch <kbusch@kernel.org> <keith.busch@intel.com> +Keith Busch <kbusch@kernel.org> <keith.busch@linux.intel.com> Kenneth W Chen <kenneth.w.chen@intel.com> Konstantin Khlebnikov <koct9i@gmail.com> <khlebnikov@yandex-team.ru> Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com> @@ -199,6 +202,8 @@ Li Yang <leoyang.li@nxp.com> <leoli@freescale.com> Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org> Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com> Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com> +Manivannan Sadhasivam <mani@kernel.org> <manivannanece23@gmail.com> +Manivannan Sadhasivam <mani@kernel.org> <manivannan.sadhasivam@linaro.org> Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com> Marc Zyngier <maz@kernel.org> <marc.zyngier@arm.com> Mark Brown <broonie@sirena.org.uk> @@ -244,6 +249,7 @@ Morten Welinder <welinder@anemone.rentec.com> Morten Welinder <welinder@darter.rentec.com> Morten Welinder <welinder@troll.com> Mythri P K <mythripk@ti.com> +Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com> Nguyen Anh Quynh <aquynh@gmail.com> Nicolas Ferre <nicolas.ferre@microchip.com> <nicolas.ferre@atmel.com> Nicolas Pitre <nico@fluxnic.net> <nicolas.pitre@linaro.org> @@ -334,6 +340,8 @@ Vinod Koul <vkoul@kernel.org> <vkoul@infradead.org> Viresh Kumar <vireshk@kernel.org> <viresh.kumar2@arm.com> Viresh Kumar <vireshk@kernel.org> <viresh.kumar@st.com> Viresh Kumar <vireshk@kernel.org> <viresh.linux@gmail.com> +Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.org> +Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.com> Vivien Didelot <vivien.didelot@gmail.com> <vivien.didelot@savoirfairelinux.com> Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com> Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com> diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net index 1f2002df5ba2..1419103d11f9 100644 --- a/Documentation/ABI/testing/sysfs-class-net +++ b/Documentation/ABI/testing/sysfs-class-net @@ -337,3 +337,18 @@ Contact: netdev@vger.kernel.org Description: 32-bit unsigned integer counting the number of times the link has been down + +What: /sys/class/net/<iface>/threaded +Date: Jan 2021 +KernelVersion: 5.12 +Contact: netdev@vger.kernel.org +Description: + Boolean value to control the threaded mode per device. User could + set this value to enable/disable threaded mode for all napi + belonging to this device, without the need to do device up/down. + + Possible values: + == ================================== + 0 threaded mode disabled for this dev + 1 threaded mode enabled for this dev + == ================================== diff --git a/Documentation/Makefile b/Documentation/Makefile index 61a7310b49e0..9c42dde97671 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -75,7 +75,7 @@ quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4) cmd_sphinx = $(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/userspace-api/media $2 && \ PYTHONDONTWRITEBYTECODE=1 \ BUILDDIR=$(abspath $(BUILDDIR)) SPHINX_CONF=$(abspath $(srctree)/$(src)/$5/$(SPHINX_CONF)) \ - $(PYTHON) $(srctree)/scripts/jobserver-exec \ + $(PYTHON3) $(srctree)/scripts/jobserver-exec \ $(SHELL) $(srctree)/Documentation/sphinx/parallel-wrapper.sh \ $(SPHINXBUILD) \ -b $2 \ diff --git a/Documentation/admin-guide/syscall-user-dispatch.rst b/Documentation/admin-guide/syscall-user-dispatch.rst index a380d6515774..60314953c728 100644 --- a/Documentation/admin-guide/syscall-user-dispatch.rst +++ b/Documentation/admin-guide/syscall-user-dispatch.rst @@ -70,8 +70,8 @@ trampoline code on the vDSO, that trampoline is never intercepted. [selector] is a pointer to a char-sized region in the process memory region, that provides a quick way to enable disable syscall redirection thread-wide, without the need to invoke the kernel directly. selector -can be set to PR_SYS_DISPATCH_ON or PR_SYS_DISPATCH_OFF. Any other -value should terminate the program with a SIGSYS. +can be set to SYSCALL_DISPATCH_FILTER_ALLOW or SYSCALL_DISPATCH_FILTER_BLOCK. +Any other value should terminate the program with a SIGSYS. Security Notes -------------- diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index 1651d961f06a..a248ac3941be 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -163,8 +163,7 @@ particular KASAN features. - ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``). - ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack - traces collection (default: ``on`` for ``CONFIG_DEBUG_KERNEL=y``, otherwise - ``off``). + traces collection (default: ``on``). - ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN report or also panic the kernel (default: ``report``). diff --git a/Documentation/devicetree/bindings/input/adc-keys.txt b/Documentation/devicetree/bindings/input/adc-keys.txt index e551814629b4..6c8be6a9ace2 100644 --- a/Documentation/devicetree/bindings/input/adc-keys.txt +++ b/Documentation/devicetree/bindings/input/adc-keys.txt @@ -5,7 +5,8 @@ Required properties: - compatible: "adc-keys" - io-channels: Phandle to an ADC channel - io-channel-names = "buttons"; - - keyup-threshold-microvolt: Voltage at which all the keys are considered up. + - keyup-threshold-microvolt: Voltage above or equal to which all the keys are + considered up. Optional properties: - poll-interval: Poll interval time in milliseconds @@ -17,7 +18,12 @@ Each button (key) is represented as a sub-node of "adc-keys": Required subnode-properties: - label: Descriptive name of the key. - linux,code: Keycode to emit. - - press-threshold-microvolt: Voltage ADC input when this key is pressed. + - press-threshold-microvolt: voltage above or equal to which this key is + considered pressed. + +No two values of press-threshold-microvolt may be the same. +All values of press-threshold-microvolt must be less than +keyup-threshold-microvolt. Example: @@ -47,3 +53,15 @@ Example: press-threshold-microvolt = <500000>; }; }; + ++--------------------------------+------------------------+ +| 2.000.000 <= value | no key pressed | ++--------------------------------+------------------------+ +| 1.500.000 <= value < 2.000.000 | KEY_VOLUMEUP pressed | ++--------------------------------+------------------------+ +| 1.000.000 <= value < 1.500.000 | KEY_VOLUMEDOWN pressed | ++--------------------------------+------------------------+ +| 500.000 <= value < 1.000.000 | KEY_ENTER pressed | ++--------------------------------+------------------------+ +| value < 500.000 | no key pressed | ++--------------------------------+------------------------+ diff --git a/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml b/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml index da5b0d87e16d..93f2ce3130ae 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml @@ -26,6 +26,7 @@ properties: - goodix,gt927 - goodix,gt9271 - goodix,gt928 + - goodix,gt9286 - goodix,gt967 reg: diff --git a/Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml b/Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml new file mode 100644 index 000000000000..79c38ea14237 --- /dev/null +++ b/Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/brcm,bcm4908-enet.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Broadcom BCM4908 Ethernet controller + +description: Broadcom's Ethernet controller integrated into BCM4908 family SoCs + +maintainers: + - RafaÅ‚ MiÅ‚ecki <rafal@milecki.pl> + +allOf: + - $ref: ethernet-controller.yaml# + +properties: + compatible: + const: brcm,bcm4908-enet + + reg: + maxItems: 1 + + interrupts: + description: RX interrupt + + interrupt-names: + const: rx + +required: + - reg + - interrupts + - interrupt-names + +additionalProperties: false + +examples: + - | + #include <dt-bindings/interrupt-controller/irq.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + + ethernet@80002000 { + compatible = "brcm,bcm4908-enet"; + reg = <0x80002000 0x1000>; + + interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "rx"; + }; diff --git a/Documentation/devicetree/bindings/net/btusb.txt b/Documentation/devicetree/bindings/net/btusb.txt index b1ad6ee68e90..c51dd99dc0d3 100644 --- a/Documentation/devicetree/bindings/net/btusb.txt +++ b/Documentation/devicetree/bindings/net/btusb.txt @@ -38,7 +38,7 @@ Following example uses irq pin number 3 of gpio0 for out of band wake-on-bt: compatible = "usb1286,204e"; reg = <1>; interrupt-parent = <&gpio0>; - interrupt-name = "wakeup"; + interrupt-names = "wakeup"; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; }; }; diff --git a/Documentation/devicetree/bindings/net/marvell-pp2.txt b/Documentation/devicetree/bindings/net/marvell-pp2.txt index b78397669320..ce15c173f43f 100644 --- a/Documentation/devicetree/bindings/net/marvell-pp2.txt +++ b/Documentation/devicetree/bindings/net/marvell-pp2.txt @@ -1,5 +1,6 @@ * Marvell Armada 375 Ethernet Controller (PPv2.1) Marvell Armada 7K/8K Ethernet Controller (PPv2.2) + Marvell CN913X Ethernet Controller (PPv2.3) Required properties: @@ -12,10 +13,11 @@ Required properties: - common controller registers - LMS registers - one register area per Ethernet port - For "marvell,armada-7k-pp2", must contain the following register + For "marvell,armada-7k-pp2" used by 7K/8K and CN913X, must contain the following register sets: - packet processor registers - networking interfaces registers + - CM3 address space used for TX Flow Control - clocks: pointers to the reference clocks for this device, consequently: - main controller clock (for both armada-375-pp2 and armada-7k-pp2) @@ -81,7 +83,7 @@ Example for marvell,armada-7k-pp2: cpm_ethernet: ethernet@0 { compatible = "marvell,armada-7k-pp22"; - reg = <0x0 0x100000>, <0x129000 0xb000>; + reg = <0x0 0x100000>, <0x129000 0xb000>, <0x220000 0x800>; clocks = <&cpm_syscon0 1 3>, <&cpm_syscon0 1 9>, <&cpm_syscon0 1 5>, <&cpm_syscon0 1 6>, <&cpm_syscon0 1 18>; clock-names = "pp_clk", "gop_clk", "mg_clk", "mg_core_clk", "axi_clk"; diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst index 587a93973929..78240e29b0bb 100644 --- a/Documentation/filesystems/overlayfs.rst +++ b/Documentation/filesystems/overlayfs.rst @@ -586,6 +586,14 @@ without significant effort. The advantage of mounting with the "volatile" option is that all forms of sync calls to the upper filesystem are omitted. +In order to avoid a giving a false sense of safety, the syncfs (and fsync) +semantics of volatile mounts are slightly different than that of the rest of +VFS. If any writeback error occurs on the upperdir's filesystem after a +volatile mount takes place, all sync functions will return an error. Once this +condition is reached, the filesystem will not recover, and every subsequent sync +call will return an error, even if the upperdir has not experience a new error +since the last sync call. + When overlay is mounted with "volatile" option, the directory "$workdir/work/incompat/volatile" is created. During next mount, overlay checks for this directory and refuses to mount if present. This is a strong diff --git a/Documentation/kbuild/gcc-plugins.rst b/Documentation/kbuild/gcc-plugins.rst index 4b1c10f88e30..3349966f213d 100644 --- a/Documentation/kbuild/gcc-plugins.rst +++ b/Documentation/kbuild/gcc-plugins.rst @@ -11,16 +11,13 @@ compiler [1]_. They are useful for runtime instrumentation and static analysis. We can analyse, change and add further code during compilation via callbacks [2]_, GIMPLE [3]_, IPA [4]_ and RTL passes [5]_. -The GCC plugin infrastructure of the kernel supports all gcc versions from -4.5 to 6.0, building out-of-tree modules, cross-compilation and building in a -separate directory. -Plugin source files have to be compilable by both a C and a C++ compiler as well -because gcc versions 4.5 and 4.6 are compiled by a C compiler, -gcc-4.7 can be compiled by a C or a C++ compiler, -and versions 4.8+ can only be compiled by a C++ compiler. +The GCC plugin infrastructure of the kernel supports building out-of-tree +modules, cross-compilation and building in a separate directory. +Plugin source files have to be compilable by a C++ compiler. -Currently the GCC plugin infrastructure supports only the x86, arm, arm64 and -powerpc architectures. +Currently the GCC plugin infrastructure supports only some architectures. +Grep "select HAVE_GCC_PLUGINS" to find out which architectures support +GCC plugins. This infrastructure was ported from grsecurity [6]_ and PaX [7]_. @@ -47,20 +44,13 @@ Files This is a compatibility header for GCC plugins. It should be always included instead of individual gcc headers. -**$(src)/scripts/gcc-plugin.sh** - - This script checks the availability of the included headers in - gcc-common.h and chooses the proper host compiler to build the plugins - (gcc-4.7 can be built by either gcc or g++). - **$(src)/scripts/gcc-plugins/gcc-generate-gimple-pass.h, $(src)/scripts/gcc-plugins/gcc-generate-ipa-pass.h, $(src)/scripts/gcc-plugins/gcc-generate-simple_ipa-pass.h, $(src)/scripts/gcc-plugins/gcc-generate-rtl-pass.h** These headers automatically generate the registration structures for - GIMPLE, SIMPLE_IPA, IPA and RTL passes. They support all gcc versions - from 4.5 to 6.0. + GIMPLE, SIMPLE_IPA, IPA and RTL passes. They should be preferred to creating the structures by hand. @@ -68,21 +58,25 @@ Usage ===== You must install the gcc plugin headers for your gcc version, -e.g., on Ubuntu for gcc-4.9:: +e.g., on Ubuntu for gcc-10:: - apt-get install gcc-4.9-plugin-dev + apt-get install gcc-10-plugin-dev Or on Fedora:: dnf install gcc-plugin-devel -Enable a GCC plugin based feature in the kernel config:: +Enable the GCC plugin infrastructure and some plugin(s) you want to use +in the kernel config:: - CONFIG_GCC_PLUGIN_CYC_COMPLEXITY = y + CONFIG_GCC_PLUGINS=y + CONFIG_GCC_PLUGIN_CYC_COMPLEXITY=y + CONFIG_GCC_PLUGIN_LATENT_ENTROPY=y + ... -To compile only the plugin(s):: +To compile the minimum tool set including the plugin(s):: - make gcc-plugins + make scripts or just run the kernel make and compile the whole kernel with the cyclomatic complexity GCC plugin. @@ -91,7 +85,8 @@ the cyclomatic complexity GCC plugin. 4. How to add a new GCC plugin ============================== -The GCC plugins are in $(src)/scripts/gcc-plugins/. You can use a file or a directory -here. It must be added to $(src)/scripts/gcc-plugins/Makefile, -$(src)/scripts/Makefile.gcc-plugins and $(src)/arch/Kconfig. +The GCC plugins are in scripts/gcc-plugins/. You need to put plugin source files +right under scripts/gcc-plugins/. Creating subdirectories is not supported. +It must be added to scripts/gcc-plugins/Makefile, scripts/Makefile.gcc-plugins +and a relevant Kconfig file. See the cyc_complexity_plugin.c (CONFIG_GCC_PLUGIN_CYC_COMPLEXITY) GCC plugin. diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst index 21c847890d03..b18401d2ba82 100644 --- a/Documentation/kbuild/llvm.rst +++ b/Documentation/kbuild/llvm.rst @@ -63,6 +63,50 @@ They can be enabled individually. The full list of the parameters: :: Currently, the integrated assembler is disabled by default. You can pass ``LLVM_IAS=1`` to enable it. +Supported Architectures +----------------------- + +LLVM does not target all of the architectures that Linux supports and +just because a target is supported in LLVM does not mean that the kernel +will build or work without any issues. Below is a general summary of +architectures that currently work with ``CC=clang`` or ``LLVM=1``. Level +of support corresponds to "S" values in the MAINTAINERS files. If an +architecture is not present, it either means that LLVM does not target +it or there are known issues. Using the latest stable version of LLVM or +even the development tree will generally yield the best results. +An architecture's ``defconfig`` is generally expected to work well, +certain configurations may have problems that have not been uncovered +yet. Bug reports are always welcome at the issue tracker below! + +.. list-table:: + :widths: 10 10 10 + :header-rows: 1 + + * - Architecture + - Level of support + - ``make`` command + * - arm + - Supported + - ``LLVM=1`` + * - arm64 + - Supported + - ``LLVM=1`` + * - mips + - Maintained + - ``CC=clang`` + * - powerpc + - Maintained + - ``CC=clang`` + * - riscv + - Maintained + - ``CC=clang`` + * - s390 + - Maintained + - ``CC=clang`` + * - x86 + - Supported + - ``LLVM=1`` + Getting Help ------------ diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index 9f6a11881951..300d8edcb994 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -755,7 +755,7 @@ more details, with real examples. bits on the scripts nonetheless. Kbuild provides variables $(CONFIG_SHELL), $(AWK), $(PERL), - $(PYTHON) and $(PYTHON3) to refer to interpreters for the respective + and $(PYTHON3) to refer to interpreters for the respective scripts. Example:: diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst index cbb75a1818c0..6b5dc203da2b 100644 --- a/Documentation/networking/device_drivers/ethernet/index.rst +++ b/Documentation/networking/device_drivers/ethernet/index.rst @@ -49,6 +49,7 @@ Contents: stmicro/stmmac ti/cpsw ti/cpsw_switchdev + ti/am65_nuss_cpsw_switchdev ti/tlan toshiba/spider_net diff --git a/Documentation/networking/device_drivers/ethernet/intel/ice.rst b/Documentation/networking/device_drivers/ethernet/intel/ice.rst index ee43ea57d443..e7d9cbff771b 100644 --- a/Documentation/networking/device_drivers/ethernet/intel/ice.rst +++ b/Documentation/networking/device_drivers/ethernet/intel/ice.rst @@ -1,46 +1,1031 @@ .. SPDX-License-Identifier: GPL-2.0+ -================================================================== -Linux Base Driver for the Intel(R) Ethernet Connection E800 Series -================================================================== +================================================================= +Linux Base Driver for the Intel(R) Ethernet Controller 800 Series +================================================================= Intel ice Linux driver. -Copyright(c) 2018 Intel Corporation. +Copyright(c) 2018-2021 Intel Corporation. Contents ======== -- Enabling the driver -- Support +- Overview +- Identifying Your Adapter +- Important Notes +- Additional Features & Configurations +- Performance Optimization -The driver in this release supports Intel's E800 Series of products. For -more information, visit Intel's support page at https://support.intel.com. -Enabling the driver -=================== -The driver is enabled via the standard kernel configuration system, -using the make command:: +The associated Virtual Function (VF) driver for this driver is iavf. - make oldconfig/menuconfig/etc. +Driver information can be obtained using ethtool and lspci. -The driver is located in the menu structure at: +For questions related to hardware requirements, refer to the documentation +supplied with your Intel adapter. All hardware requirements listed apply to use +with Linux. + +This driver supports XDP (Express Data Path) and AF_XDP zero-copy. Note that +XDP is blocked for frame sizes larger than 3KB. + + +Identifying Your Adapter +======================== +For information on how to identify your adapter, and for the latest Intel +network drivers, refer to the Intel Support website: +https://www.intel.com/support + + +Important Notes +=============== + +Packet drops may occur under receive stress +------------------------------------------- +Devices based on the Intel(R) Ethernet Controller 800 Series are designed to +tolerate a limited amount of system latency during PCIe and DMA transactions. +If these transactions take longer than the tolerated latency, it can impact the +length of time the packets are buffered in the device and associated memory, +which may result in dropped packets. These packets drops typically do not have +a noticeable impact on throughput and performance under standard workloads. + +If these packet drops appear to affect your workload, the following may improve +the situation: + +1) Make sure that your system's physical memory is in a high-performance + configuration, as recommended by the platform vendor. A common + recommendation is for all channels to be populated with a single DIMM + module. +2) In your system's BIOS/UEFI settings, select the "Performance" profile. +3) Your distribution may provide tools like "tuned," which can help tweak + kernel settings to achieve better standard settings for different workloads. + + +Configuring SR-IOV for improved network security +------------------------------------------------ +In a virtualized environment, on Intel(R) Ethernet Network Adapters that +support SR-IOV, the virtual function (VF) may be subject to malicious behavior. +Software-generated layer two frames, like IEEE 802.3x (link flow control), IEEE +802.1Qbb (priority based flow-control), and others of this type, are not +expected and can throttle traffic between the host and the virtual switch, +reducing performance. To resolve this issue, and to ensure isolation from +unintended traffic streams, configure all SR-IOV enabled ports for VLAN tagging +from the administrative interface on the PF. This configuration allows +unexpected, and potentially malicious, frames to be dropped. + +See "Configuring VLAN Tagging on SR-IOV Enabled Adapter Ports" later in this +README for configuration instructions. + + +Do not unload port driver if VF with active VM is bound to it +------------------------------------------------------------- +Do not unload a port's driver if a Virtual Function (VF) with an active Virtual +Machine (VM) is bound to it. Doing so will cause the port to appear to hang. +Once the VM shuts down, or otherwise releases the VF, the command will +complete. + + +Important notes for SR-IOV and Link Aggregation +----------------------------------------------- +Link Aggregation is mutually exclusive with SR-IOV. + +- If Link Aggregation is active, SR-IOV VFs cannot be created on the PF. +- If SR-IOV is active, you cannot set up Link Aggregation on the interface. + +Bridging and MACVLAN are also affected by this. If you wish to use bridging or +MACVLAN with SR-IOV, you must set up bridging or MACVLAN before enabling +SR-IOV. If you are using bridging or MACVLAN in conjunction with SR-IOV, and +you want to remove the interface from the bridge or MACVLAN, you must follow +these steps: + +1. Destroy SR-IOV VFs if they exist +2. Remove the interface from the bridge or MACVLAN +3. Recreate SRIOV VFs as needed + + +Additional Features and Configurations +====================================== + +ethtool +------- +The driver utilizes the ethtool interface for driver configuration and +diagnostics, as well as displaying statistical information. The latest ethtool +version is required for this functionality. Download it at: +https://kernel.org/pub/software/network/ethtool/ + +NOTE: The rx_bytes value of ethtool does not match the rx_bytes value of +Netdev, due to the 4-byte CRC being stripped by the device. The difference +between the two rx_bytes values will be 4 x the number of Rx packets. For +example, if Rx packets are 10 and Netdev (software statistics) displays +rx_bytes as "X", then ethtool (hardware statistics) will display rx_bytes as +"X+40" (4 bytes CRC x 10 packets). + + +Viewing Link Messages +--------------------- +Link messages will not be displayed to the console if the distribution is +restricting system messages. In order to see network driver link messages on +your console, set dmesg to eight by entering the following:: + + # dmesg -n 8 + +NOTE: This setting is not saved across reboots. + + +Dynamic Device Personalization +------------------------------ +Dynamic Device Personalization (DDP) allows you to change the packet processing +pipeline of a device by applying a profile package to the device at runtime. +Profiles can be used to, for example, add support for new protocols, change +existing protocols, or change default settings. DDP profiles can also be rolled +back without rebooting the system. + +The DDP package loads during device initialization. The driver looks for +``intel/ice/ddp/ice.pkg`` in your firmware root (typically ``/lib/firmware/`` +or ``/lib/firmware/updates/``) and checks that it contains a valid DDP package +file. + +NOTE: Your distribution should likely have provided the latest DDP file, but if +ice.pkg is missing, you can find it in the linux-firmware repository or from +intel.com. + +If the driver is unable to load the DDP package, the device will enter Safe +Mode. Safe Mode disables advanced and performance features and supports only +basic traffic and minimal functionality, such as updating the NVM or +downloading a new driver or DDP package. Safe Mode only applies to the affected +physical function and does not impact any other PFs. See the "Intel(R) Ethernet +Adapters and Devices User Guide" for more details on DDP and Safe Mode. + +NOTES: + +- If you encounter issues with the DDP package file, you may need to download + an updated driver or DDP package file. See the log messages for more + information. + +- The ice.pkg file is a symbolic link to the default DDP package file. + +- You cannot update the DDP package if any PF drivers are already loaded. To + overwrite a package, unload all PFs and then reload the driver with the new + package. + +- Only the first loaded PF per device can download a package for that device. + +You can install specific DDP package files for different physical devices in +the same system. To install a specific DDP package file: + +1. Download the DDP package file you want for your device. + +2. Rename the file ice-xxxxxxxxxxxxxxxx.pkg, where 'xxxxxxxxxxxxxxxx' is the + unique 64-bit PCI Express device serial number (in hex) of the device you + want the package downloaded on. The filename must include the complete + serial number (including leading zeros) and be all lowercase. For example, + if the 64-bit serial number is b887a3ffffca0568, then the file name would be + ice-b887a3ffffca0568.pkg. + + To find the serial number from the PCI bus address, you can use the + following command:: + + # lspci -vv -s af:00.0 | grep -i Serial + Capabilities: [150 v1] Device Serial Number b8-87-a3-ff-ff-ca-05-68 + + You can use the following command to format the serial number without the + dashes:: + + # lspci -vv -s af:00.0 | grep -i Serial | awk '{print $7}' | sed s/-//g + b887a3ffffca0568 + +3. Copy the renamed DDP package file to + ``/lib/firmware/updates/intel/ice/ddp/``. If the directory does not yet + exist, create it before copying the file. + +4. Unload all of the PFs on the device. + +5. Reload the driver with the new package. + +NOTE: The presence of a device-specific DDP package file overrides the loading +of the default DDP package file (ice.pkg). + + +Intel(R) Ethernet Flow Director +------------------------------- +The Intel Ethernet Flow Director performs the following tasks: + +- Directs receive packets according to their flows to different queues +- Enables tight control on routing a flow in the platform +- Matches flows and CPU cores for flow affinity + +NOTE: This driver supports the following flow types: + +- IPv4 +- TCPv4 +- UDPv4 +- SCTPv4 +- IPv6 +- TCPv6 +- UDPv6 +- SCTPv6 + +Each flow type supports valid combinations of IP addresses (source or +destination) and UDP/TCP/SCTP ports (source and destination). You can supply +only a source IP address, a source IP address and a destination port, or any +combination of one or more of these four parameters. + +NOTE: This driver allows you to filter traffic based on a user-defined flexible +two-byte pattern and offset by using the ethtool user-def and mask fields. Only +L3 and L4 flow types are supported for user-defined flexible filters. For a +given flow type, you must clear all Intel Ethernet Flow Director filters before +changing the input set (for that flow type). + + +Flow Director Filters +--------------------- +Flow Director filters are used to direct traffic that matches specified +characteristics. They are enabled through ethtool's ntuple interface. To enable +or disable the Intel Ethernet Flow Director and these filters:: + + # ethtool -K <ethX> ntuple <off|on> + +NOTE: When you disable ntuple filters, all the user programmed filters are +flushed from the driver cache and hardware. All needed filters must be re-added +when ntuple is re-enabled. + +To display all of the active filters:: + + # ethtool -u <ethX> + +To add a new filter:: + + # ethtool -U <ethX> flow-type <type> src-ip <ip> [m <ip_mask>] dst-ip <ip> + [m <ip_mask>] src-port <port> [m <port_mask>] dst-port <port> [m <port_mask>] + action <queue> + + Where: + <ethX> - the Ethernet device to program + <type> - can be ip4, tcp4, udp4, sctp4, ip6, tcp6, udp6, sctp6 + <ip> - the IP address to match on + <ip_mask> - the IPv4 address to mask on + NOTE: These filters use inverted masks. + <port> - the port number to match on + <port_mask> - the 16-bit integer for masking + NOTE: These filters use inverted masks. + <queue> - the queue to direct traffic toward (-1 discards the + matched traffic) + +To delete a filter:: + + # ethtool -U <ethX> delete <N> + + Where <N> is the filter ID displayed when printing all the active filters, + and may also have been specified using "loc <N>" when adding the filter. + +EXAMPLES: + +To add a filter that directs packet to queue 2:: + + # ethtool -U <ethX> flow-type tcp4 src-ip 192.168.10.1 dst-ip \ + 192.168.10.2 src-port 2000 dst-port 2001 action 2 [loc 1] + +To set a filter using only the source and destination IP address:: + + # ethtool -U <ethX> flow-type tcp4 src-ip 192.168.10.1 dst-ip \ + 192.168.10.2 action 2 [loc 1] + +To set a filter based on a user-defined pattern and offset:: + + # ethtool -U <ethX> flow-type tcp4 src-ip 192.168.10.1 dst-ip \ + 192.168.10.2 user-def 0x4FFFF action 2 [loc 1] + + where the value of the user-def field contains the offset (4 bytes) and + the pattern (0xffff). + +To match TCP traffic sent from 192.168.0.1, port 5300, directed to 192.168.0.5, +port 80, and then send it to queue 7:: + + # ethtool -U enp130s0 flow-type tcp4 src-ip 192.168.0.1 dst-ip 192.168.0.5 + src-port 5300 dst-port 80 action 7 + +To add a TCPv4 filter with a partial mask for a source IP subnet:: + + # ethtool -U <ethX> flow-type tcp4 src-ip 192.168.0.0 m 0.255.255.255 dst-ip + 192.168.5.12 src-port 12600 dst-port 31 action 12 + +NOTES: + +For each flow-type, the programmed filters must all have the same matching +input set. For example, issuing the following two commands is acceptable:: + + # ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7 + # ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.5 src-port 55 action 10 + +Issuing the next two commands, however, is not acceptable, since the first +specifies src-ip and the second specifies dst-ip:: + + # ethtool -U enp130s0 flow-type ip4 src-ip 192.168.0.1 src-port 5300 action 7 + # ethtool -U enp130s0 flow-type ip4 dst-ip 192.168.0.5 src-port 55 action 10 + +The second command will fail with an error. You may program multiple filters +with the same fields, using different values, but, on one device, you may not +program two tcp4 filters with different matching fields. + +The ice driver does not support matching on a subportion of a field, thus +partial mask fields are not supported. + + +Flex Byte Flow Director Filters +------------------------------- +The driver also supports matching user-defined data within the packet payload. +This flexible data is specified using the "user-def" field of the ethtool +command in the following way: + +.. table:: + + ============================== ============================ + ``31 28 24 20 16`` ``15 12 8 4 0`` + ``offset into packet payload`` ``2 bytes of flexible data`` + ============================== ============================ + +For example, + +:: + + ... user-def 0x4FFFF ... + +tells the filter to look 4 bytes into the payload and match that value against +0xFFFF. The offset is based on the beginning of the payload, and not the +beginning of the packet. Thus + +:: + + flow-type tcp4 ... user-def 0x8BEAF ... + +would match TCP/IPv4 packets which have the value 0xBEAF 8 bytes into the +TCP/IPv4 payload. + +Note that ICMP headers are parsed as 4 bytes of header and 4 bytes of payload. +Thus to match the first byte of the payload, you must actually add 4 bytes to +the offset. Also note that ip4 filters match both ICMP frames as well as raw +(unknown) ip4 frames, where the payload will be the L3 payload of the IP4 +frame. + +The maximum offset is 64. The hardware will only read up to 64 bytes of data +from the payload. The offset must be even because the flexible data is 2 bytes +long and must be aligned to byte 0 of the packet payload. + +The user-defined flexible offset is also considered part of the input set and +cannot be programmed separately for multiple filters of the same type. However, +the flexible data is not part of the input set and multiple filters may use the +same offset but match against different data. + + +RSS Hash Flow +------------- +Allows you to set the hash bytes per flow type and any combination of one or +more options for Receive Side Scaling (RSS) hash byte configuration. + +:: + + # ethtool -N <ethX> rx-flow-hash <type> <option> + + Where <type> is: + tcp4 signifying TCP over IPv4 + udp4 signifying UDP over IPv4 + tcp6 signifying TCP over IPv6 + udp6 signifying UDP over IPv6 + And <option> is one or more of: + s Hash on the IP source address of the Rx packet. + d Hash on the IP destination address of the Rx packet. + f Hash on bytes 0 and 1 of the Layer 4 header of the Rx packet. + n Hash on bytes 2 and 3 of the Layer 4 header of the Rx packet. + + +Accelerated Receive Flow Steering (aRFS) +---------------------------------------- +Devices based on the Intel(R) Ethernet Controller 800 Series support +Accelerated Receive Flow Steering (aRFS) on the PF. aRFS is a load-balancing +mechanism that allows you to direct packets to the same CPU where an +application is running or consuming the packets in that flow. + +NOTES: + +- aRFS requires that ntuple filtering is enabled via ethtool. +- aRFS support is limited to the following packet types: + + - TCP over IPv4 and IPv6 + - UDP over IPv4 and IPv6 + - Nonfragmented packets + +- aRFS only supports Flow Director filters, which consist of the + source/destination IP addresses and source/destination ports. +- aRFS and ethtool's ntuple interface both use the device's Flow Director. aRFS + and ntuple features can coexist, but you may encounter unexpected results if + there's a conflict between aRFS and ntuple requests. See "Intel(R) Ethernet + Flow Director" for additional information. + +To set up aRFS: + +1. Enable the Intel Ethernet Flow Director and ntuple filters using ethtool. + +:: + + # ethtool -K <ethX> ntuple on + +2. Set up the number of entries in the global flow table. For example: + +:: + + # NUM_RPS_ENTRIES=16384 + # echo $NUM_RPS_ENTRIES > /proc/sys/net/core/rps_sock_flow_entries + +3. Set up the number of entries in the per-queue flow table. For example: + +:: + + # NUM_RX_QUEUES=64 + # for file in /sys/class/net/$IFACE/queues/rx-*/rps_flow_cnt; do + # echo $(($NUM_RPS_ENTRIES/$NUM_RX_QUEUES)) > $file; + # done + +4. Disable the IRQ balance daemon (this is only a temporary stop of the service + until the next reboot). + +:: + + # systemctl stop irqbalance + +5. Configure the interrupt affinity. + + See ``/Documentation/core-api/irq/irq-affinity.rst`` + + +To disable aRFS using ethtool:: + + # ethtool -K <ethX> ntuple off + +NOTE: This command will disable ntuple filters and clear any aRFS filters in +software and hardware. + +Example Use Case: + +1. Set the server application on the desired CPU (e.g., CPU 4). + +:: + + # taskset -c 4 netserver + +2. Use netperf to route traffic from the client to CPU 4 on the server with + aRFS configured. This example uses TCP over IPv4. + +:: + + # netperf -H <Host IPv4 Address> -t TCP_STREAM + + +Enabling Virtual Functions (VFs) +-------------------------------- +Use sysfs to enable virtual functions (VF). + +For example, you can create 4 VFs as follows:: + + # echo 4 > /sys/class/net/<ethX>/device/sriov_numvfs + +To disable VFs, write 0 to the same file:: + + # echo 0 > /sys/class/net/<ethX>/device/sriov_numvfs + +The maximum number of VFs for the ice driver is 256 total (all ports). To check +how many VFs each PF supports, use the following command:: + + # cat /sys/class/net/<ethX>/device/sriov_totalvfs + +Note: You cannot use SR-IOV when link aggregation (LAG)/bonding is active, and +vice versa. To enforce this, the driver checks for this mutual exclusion. + + +Displaying VF Statistics on the PF +---------------------------------- +Use the following command to display the statistics for the PF and its VFs:: + + # ip -s link show dev <ethX> + +NOTE: The output of this command can be very large due to the maximum number of +possible VFs. + +The PF driver will display a subset of the statistics for the PF and for all +VFs that are configured. The PF will always print a statistics block for each +of the possible VFs, and it will show zero for all unconfigured VFs. + + +Configuring VLAN Tagging on SR-IOV Enabled Adapter Ports +-------------------------------------------------------- +To configure VLAN tagging for the ports on an SR-IOV enabled adapter, use the +following command. The VLAN configuration should be done before the VF driver +is loaded or the VM is booted. The VF is not aware of the VLAN tag being +inserted on transmit and removed on received frames (sometimes called "port +VLAN" mode). + +:: + + # ip link set dev <ethX> vf <id> vlan <vlan id> + +For example, the following will configure PF eth0 and the first VF on VLAN 10:: + + # ip link set dev eth0 vf 0 vlan 10 + + +Enabling a VF link if the port is disconnected +---------------------------------------------- +If the physical function (PF) link is down, you can force link up (from the +host PF) on any virtual functions (VF) bound to the PF. + +For example, to force link up on VF 0 bound to PF eth0:: + + # ip link set eth0 vf 0 state enable + +Note: If the command does not work, it may not be supported by your system. + + +Setting the MAC Address for a VF +-------------------------------- +To change the MAC address for the specified VF:: + + # ip link set <ethX> vf 0 mac <address> + +For example:: + + # ip link set <ethX> vf 0 mac 00:01:02:03:04:05 + +This setting lasts until the PF is reloaded. + +NOTE: Assigning a MAC address for a VF from the host will disable any +subsequent requests to change the MAC address from within the VM. This is a +security feature. The VM is not aware of this restriction, so if this is +attempted in the VM, it will trigger MDD events. + + +Trusted VFs and VF Promiscuous Mode +----------------------------------- +This feature allows you to designate a particular VF as trusted and allows that +trusted VF to request selective promiscuous mode on the Physical Function (PF). + +To set a VF as trusted or untrusted, enter the following command in the +Hypervisor:: + + # ip link set dev <ethX> vf 1 trust [on|off] + +NOTE: It's important to set the VF to trusted before setting promiscuous mode. +If the VM is not trusted, the PF will ignore promiscuous mode requests from the +VF. If the VM becomes trusted after the VF driver is loaded, you must make a +new request to set the VF to promiscuous. + +Once the VF is designated as trusted, use the following commands in the VM to +set the VF to promiscuous mode. + +For promiscuous all:: + + # ip link set <ethX> promisc on + Where <ethX> is a VF interface in the VM + +For promiscuous Multicast:: + + # ip link set <ethX> allmulticast on + Where <ethX> is a VF interface in the VM + +NOTE: By default, the ethtool private flag vf-true-promisc-support is set to +"off," meaning that promiscuous mode for the VF will be limited. To set the +promiscuous mode for the VF to true promiscuous and allow the VF to see all +ingress traffic, use the following command:: + + # ethtool --set-priv-flags <ethX> vf-true-promisc-support on + +The vf-true-promisc-support private flag does not enable promiscuous mode; +rather, it designates which type of promiscuous mode (limited or true) you will +get when you enable promiscuous mode using the ip link commands above. Note +that this is a global setting that affects the entire device. However, the +vf-true-promisc-support private flag is only exposed to the first PF of the +device. The PF remains in limited promiscuous mode regardless of the +vf-true-promisc-support setting. + +Next, add a VLAN interface on the VF interface. For example:: + + # ip link add link eth2 name eth2.100 type vlan id 100 + +Note that the order in which you set the VF to promiscuous mode and add the +VLAN interface does not matter (you can do either first). The result in this +example is that the VF will get all traffic that is tagged with VLAN 100. + + +Malicious Driver Detection (MDD) for VFs +---------------------------------------- +Some Intel Ethernet devices use Malicious Driver Detection (MDD) to detect +malicious traffic from the VF and disable Tx/Rx queues or drop the offending +packet until a VF driver reset occurs. You can view MDD messages in the PF's +system log using the dmesg command. + +- If the PF driver logs MDD events from the VF, confirm that the correct VF + driver is installed. +- To restore functionality, you can manually reload the VF or VM or enable + automatic VF resets. +- When automatic VF resets are enabled, the PF driver will immediately reset + the VF and reenable queues when it detects MDD events on the receive path. +- If automatic VF resets are disabled, the PF will not automatically reset the + VF when it detects MDD events. + +To enable or disable automatic VF resets, use the following command:: + + # ethtool --set-priv-flags <ethX> mdd-auto-reset-vf on|off + + +MAC and VLAN Anti-Spoofing Feature for VFs +------------------------------------------ +When a malicious driver on a Virtual Function (VF) interface attempts to send a +spoofed packet, it is dropped by the hardware and not transmitted. + +NOTE: This feature can be disabled for a specific VF:: + + # ip link set <ethX> vf <vf id> spoofchk {off|on} + + +Jumbo Frames +------------ +Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU) +to a value larger than the default value of 1500. + +Use the ifconfig command to increase the MTU size. For example, enter the +following where <ethX> is the interface number:: + + # ifconfig <ethX> mtu 9000 up + +Alternatively, you can use the ip command as follows:: + + # ip link set mtu 9000 dev <ethX> + # ip link set up dev <ethX> + +This setting is not saved across reboots. + + +NOTE: The maximum MTU setting for jumbo frames is 9702. This corresponds to the +maximum jumbo frame size of 9728 bytes. + +NOTE: This driver will attempt to use multiple page sized buffers to receive +each jumbo packet. This should help to avoid buffer starvation issues when +allocating receive packets. + +NOTE: Packet loss may have a greater impact on throughput when you use jumbo +frames. If you observe a drop in performance after enabling jumbo frames, +enabling flow control may mitigate the issue. + + +Speed and Duplex Configuration +------------------------------ +In addressing speed and duplex configuration issues, you need to distinguish +between copper-based adapters and fiber-based adapters. + +In the default mode, an Intel(R) Ethernet Network Adapter using copper +connections will attempt to auto-negotiate with its link partner to determine +the best setting. If the adapter cannot establish link with the link partner +using auto-negotiation, you may need to manually configure the adapter and link +partner to identical settings to establish link and pass packets. This should +only be needed when attempting to link with an older switch that does not +support auto-negotiation or one that has been forced to a specific speed or +duplex mode. Your link partner must match the setting you choose. 1 Gbps speeds +and higher cannot be forced. Use the autonegotiation advertising setting to +manually set devices for 1 Gbps and higher. + +Speed, duplex, and autonegotiation advertising are configured through the +ethtool utility. For the latest version, download and install ethtool from the +following website: + + https://kernel.org/pub/software/network/ethtool/ + +To see the speed configurations your device supports, run the following:: + + # ethtool <ethX> + +Caution: Only experienced network administrators should force speed and duplex +or change autonegotiation advertising manually. The settings at the switch must +always match the adapter settings. Adapter performance may suffer or your +adapter may not operate if you configure the adapter differently from your +switch. + + +Data Center Bridging (DCB) +-------------------------- +NOTE: The kernel assumes that TC0 is available, and will disable Priority Flow +Control (PFC) on the device if TC0 is not available. To fix this, ensure TC0 is +enabled when setting up DCB on your switch. + +DCB is a configuration Quality of Service implementation in hardware. It uses +the VLAN priority tag (802.1p) to filter traffic. That means that there are 8 +different priorities that traffic can be filtered into. It also enables +priority flow control (802.1Qbb) which can limit or eliminate the number of +dropped packets during network stress. Bandwidth can be allocated to each of +these priorities, which is enforced at the hardware level (802.1Qaz). + +DCB is normally configured on the network using the DCBX protocol (802.1Qaz), a +specialization of LLDP (802.1AB). The ice driver supports the following +mutually exclusive variants of DCBX support: + +1) Firmware-based LLDP Agent +2) Software-based LLDP Agent + +In firmware-based mode, firmware intercepts all LLDP traffic and handles DCBX +negotiation transparently for the user. In this mode, the adapter operates in +"willing" DCBX mode, receiving DCB settings from the link partner (typically a +switch). The local user can only query the negotiated DCB configuration. For +information on configuring DCBX parameters on a switch, please consult the +switch manufacturer's documentation. + +In software-based mode, LLDP traffic is forwarded to the network stack and user +space, where a software agent can handle it. In this mode, the adapter can +operate in either "willing" or "nonwilling" DCBX mode and DCB configuration can +be both queried and set locally. This mode requires the FW-based LLDP Agent to +be disabled. + +NOTE: + +- You can enable and disable the firmware-based LLDP Agent using an ethtool + private flag. Refer to the "FW-LLDP (Firmware Link Layer Discovery Protocol)" + section in this README for more information. +- In software-based DCBX mode, you can configure DCB parameters using software + LLDP/DCBX agents that interface with the Linux kernel's DCB Netlink API. We + recommend using OpenLLDP as the DCBX agent when running in software mode. For + more information, see the OpenLLDP man pages and + https://github.com/intel/openlldp. +- The driver implements the DCB netlink interface layer to allow the user space + to communicate with the driver and query DCB configuration for the port. +- iSCSI with DCB is not supported. + + +FW-LLDP (Firmware Link Layer Discovery Protocol) +------------------------------------------------ +Use ethtool to change FW-LLDP settings. The FW-LLDP setting is per port and +persists across boots. + +To enable LLDP:: + + # ethtool --set-priv-flags <ethX> fw-lldp-agent on + +To disable LLDP:: + + # ethtool --set-priv-flags <ethX> fw-lldp-agent off + +To check the current LLDP setting:: + + # ethtool --show-priv-flags <ethX> + +NOTE: You must enable the UEFI HII "LLDP Agent" attribute for this setting to +take effect. If "LLDP AGENT" is set to disabled, you cannot enable it from the +OS. + + +Flow Control +------------ +Ethernet Flow Control (IEEE 802.3x) can be configured with ethtool to enable +receiving and transmitting pause frames for ice. When transmit is enabled, +pause frames are generated when the receive packet buffer crosses a predefined +threshold. When receive is enabled, the transmit unit will halt for the time +delay specified when a pause frame is received. + +NOTE: You must have a flow control capable link partner. + +Flow Control is disabled by default. + +Use ethtool to change the flow control settings. + +To enable or disable Rx or Tx Flow Control:: + + # ethtool -A <ethX> rx <on|off> tx <on|off> + +Note: This command only enables or disables Flow Control if auto-negotiation is +disabled. If auto-negotiation is enabled, this command changes the parameters +used for auto-negotiation with the link partner. + +Note: Flow Control auto-negotiation is part of link auto-negotiation. Depending +on your device, you may not be able to change the auto-negotiation setting. + +NOTE: + +- The ice driver requires flow control on both the port and link partner. If + flow control is disabled on one of the sides, the port may appear to hang on + heavy traffic. +- You may encounter issues with link-level flow control (LFC) after disabling + DCB. The LFC status may show as enabled but traffic is not paused. To resolve + this issue, disable and reenable LFC using ethtool:: + + # ethtool -A <ethX> rx off tx off + # ethtool -A <ethX> rx on tx on + + +NAPI +---- +This driver supports NAPI (Rx polling mode). +For more information on NAPI, see +https://www.linuxfoundation.org/collaborate/workgroups/networking/napi + + +MACVLAN +------- +This driver supports MACVLAN. Kernel support for MACVLAN can be tested by +checking if the MACVLAN driver is loaded. You can run 'lsmod | grep macvlan' to +see if the MACVLAN driver is loaded or run 'modprobe macvlan' to try to load +the MACVLAN driver. + +NOTE: + +- In passthru mode, you can only set up one MACVLAN device. It will inherit the + MAC address of the underlying PF (Physical Function) device. + + +IEEE 802.1ad (QinQ) Support +--------------------------- +The IEEE 802.1ad standard, informally known as QinQ, allows for multiple VLAN +IDs within a single Ethernet frame. VLAN IDs are sometimes referred to as +"tags," and multiple VLAN IDs are thus referred to as a "tag stack." Tag stacks +allow L2 tunneling and the ability to segregate traffic within a particular +VLAN ID, among other uses. + +NOTES: + +- Receive checksum offloads and VLAN acceleration are not supported for 802.1ad + (QinQ) packets. + +- 0x88A8 traffic will not be received unless VLAN stripping is disabled with + the following command:: + + # ethool -K <ethX> rxvlan off + +- 0x88A8/0x8100 double VLANs cannot be used with 0x8100 or 0x8100/0x8100 VLANS + configured on the same port. 0x88a8/0x8100 traffic will not be received if + 0x8100 VLANs are configured. + +- The VF can only transmit 0x88A8/0x8100 (i.e., 802.1ad/802.1Q) traffic if: + + 1) The VF is not assigned a port VLAN. + 2) spoofchk is disabled from the PF. If you enable spoofchk, the VF will + not transmit 0x88A8/0x8100 traffic. + +- The VF may not receive all network traffic based on the Inner VLAN header + when VF true promiscuous mode (vf-true-promisc-support) and double VLANs are + enabled in SR-IOV mode. + +The following are examples of how to configure 802.1ad (QinQ):: + + # ip link add link eth0 eth0.24 type vlan proto 802.1ad id 24 + # ip link add link eth0.24 eth0.24.371 type vlan proto 802.1Q id 371 + + Where "24" and "371" are example VLAN IDs. + + +Tunnel/Overlay Stateless Offloads +--------------------------------- +Supported tunnels and overlays include VXLAN, GENEVE, and others depending on +hardware and software configuration. Stateless offloads are enabled by default. + +To view the current state of all offloads:: + + # ethtool -k <ethX> + + +UDP Segmentation Offload +------------------------ +Allows the adapter to offload transmit segmentation of UDP packets with +payloads up to 64K into valid Ethernet frames. Because the adapter hardware is +able to complete data segmentation much faster than operating system software, +this feature may improve transmission performance. +In addition, the adapter may use fewer CPU resources. + +NOTE: + +- The application sending UDP packets must support UDP segmentation offload. + +To enable/disable UDP Segmentation Offload, issue the following command:: + + # ethtool -K <ethX> tx-udp-segmentation [off|on] + + +Performance Optimization +======================== +Driver defaults are meant to fit a wide variety of workloads, but if further +optimization is required, we recommend experimenting with the following +settings. + + +Rx Descriptor Ring Size +----------------------- +To reduce the number of Rx packet discards, increase the number of Rx +descriptors for each Rx ring using ethtool. + + Check if the interface is dropping Rx packets due to buffers being full + (rx_dropped.nic can mean that there is no PCIe bandwidth):: + + # ethtool -S <ethX> | grep "rx_dropped" + + If the previous command shows drops on queues, it may help to increase + the number of descriptors using 'ethtool -G':: + + # ethtool -G <ethX> rx <N> + Where <N> is the desired number of ring entries/descriptors + + This can provide temporary buffering for issues that create latency while + the CPUs process descriptors. + + +Interrupt Rate Limiting +----------------------- +This driver supports an adaptive interrupt throttle rate (ITR) mechanism that +is tuned for general workloads. The user can customize the interrupt rate +control for specific workloads, via ethtool, adjusting the number of +microseconds between interrupts. + +To set the interrupt rate manually, you must disable adaptive mode:: + + # ethtool -C <ethX> adaptive-rx off adaptive-tx off + +For lower CPU utilization: + + Disable adaptive ITR and lower Rx and Tx interrupts. The examples below + affect every queue of the specified interface. + + Setting rx-usecs and tx-usecs to 80 will limit interrupts to about + 12,500 interrupts per second per queue:: + + # ethtool -C <ethX> adaptive-rx off adaptive-tx off rx-usecs 80 tx-usecs 80 + +For reduced latency: + + Disable adaptive ITR and ITR by setting rx-usecs and tx-usecs to 0 + using ethtool:: + + # ethtool -C <ethX> adaptive-rx off adaptive-tx off rx-usecs 0 tx-usecs 0 + +Per-queue interrupt rate settings: + + The following examples are for queues 1 and 3, but you can adjust other + queues. + + To disable Rx adaptive ITR and set static Rx ITR to 10 microseconds or + about 100,000 interrupts/second, for queues 1 and 3:: + + # ethtool --per-queue <ethX> queue_mask 0xa --coalesce adaptive-rx off + rx-usecs 10 + + To show the current coalesce settings for queues 1 and 3:: + + # ethtool --per-queue <ethX> queue_mask 0xa --show-coalesce + +Bounding interrupt rates using rx-usecs-high: + + :Valid Range: 0-236 (0=no limit) + + The range of 0-236 microseconds provides an effective range of 4,237 to + 250,000 interrupts per second. The value of rx-usecs-high can be set + independently of rx-usecs and tx-usecs in the same ethtool command, and is + also independent of the adaptive interrupt moderation algorithm. The + underlying hardware supports granularity in 4-microsecond intervals, so + adjacent values may result in the same interrupt rate. + + The following command would disable adaptive interrupt moderation, and allow + a maximum of 5 microseconds before indicating a receive or transmit was + complete. However, instead of resulting in as many as 200,000 interrupts per + second, it limits total interrupts per second to 50,000 via the rx-usecs-high + parameter. + + :: + + # ethtool -C <ethX> adaptive-rx off adaptive-tx off rx-usecs-high 20 + rx-usecs 5 tx-usecs 5 + + +Virtualized Environments +------------------------ +In addition to the other suggestions in this section, the following may be +helpful to optimize performance in VMs. + + Using the appropriate mechanism (vcpupin) in the VM, pin the CPUs to + individual LCPUs, making sure to use a set of CPUs included in the + device's local_cpulist: ``/sys/class/net/<ethX>/device/local_cpulist``. + + Configure as many Rx/Tx queues in the VM as available. (See the iavf driver + documentation for the number of queues supported.) For example:: + + # ethtool -L <virt_interface> rx <max> tx <max> - -> Device Drivers - -> Network device support (NETDEVICES [=y]) - -> Ethernet driver support - -> Intel devices - -> Intel(R) Ethernet Connection E800 Series Support Support ======= For general information, go to the Intel support website at: - https://www.intel.com/support/ or the Intel Wired Networking project hosted by Sourceforge at: - https://sourceforge.net/projects/e1000 If an issue is identified with the released source code on a supported kernel with a supported adapter, email the specific information related to the issue to e1000-devel@lists.sf.net. + + +Trademarks +========== +Intel is a trademark or registered trademark of Intel Corporation or its +subsidiaries in the United States and/or other countries. + +* Other names and brands may be claimed as the property of others. diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst index a1b32fcd0d76..1b7e32d8a61b 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst @@ -13,12 +13,12 @@ Contents - `Devlink info`_ - `Devlink parameters`_ - `mlx5 subfunction`_ -- `mlx5 port function`_ +- `mlx5 function attributes`_ - `Devlink health reporters`_ - `mlx5 tracepoints`_ Enabling the driver and kconfig options -================================================ +======================================= | mlx5 core is modular and most of the major mlx5 core driver features can be selected (compiled in/out) | at build time via kernel Kconfig flags. diff --git a/Documentation/networking/device_drivers/ethernet/ti/am65_nuss_cpsw_switchdev.rst b/Documentation/networking/device_drivers/ethernet/ti/am65_nuss_cpsw_switchdev.rst new file mode 100644 index 000000000000..f24adfab6a1b --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/ti/am65_nuss_cpsw_switchdev.rst @@ -0,0 +1,143 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================================================================== +Texas Instruments K3 AM65 CPSW NUSS switchdev based ethernet driver +=================================================================== + +:Version: 1.0 + +Port renaming +============= + +In order to rename via udev:: + + ip -d link show dev sw0p1 | grep switchid + + SUBSYSTEM=="net", ACTION=="add", ATTR{phys_switch_id}==<switchid>, \ + ATTR{phys_port_name}!="", NAME="sw0$attr{phys_port_name}" + + +Multi mac mode +============== + +- The driver is operating in multi-mac mode by default, thus + working as N individual network interfaces. + +Devlink configuration parameters +================================ + +See Documentation/networking/devlink/am65-nuss-cpsw-switch.rst + +Enabling "switch" +================= + +The Switch mode can be enabled by configuring devlink driver parameter +"switch_mode" to 1/true:: + + devlink dev param set platform/c000000.ethernet \ + name switch_mode value true cmode runtime + +This can be done regardless of the state of Port's netdev devices - UP/DOWN, but +Port's netdev devices have to be in UP before joining to the bridge to avoid +overwriting of bridge configuration as CPSW switch driver completely reloads its +configuration when first port changes its state to UP. + +When the both interfaces joined the bridge - CPSW switch driver will enable +marking packets with offload_fwd_mark flag. + +All configuration is implemented via switchdev API. + +Bridge setup +============ + +:: + + devlink dev param set platform/c000000.ethernet \ + name switch_mode value true cmode runtime + + ip link add name br0 type bridge + ip link set dev br0 type bridge ageing_time 1000 + ip link set dev sw0p1 up + ip link set dev sw0p2 up + ip link set dev sw0p1 master br0 + ip link set dev sw0p2 master br0 + + [*] bridge vlan add dev br0 vid 1 pvid untagged self + + [*] if vlan_filtering=1. where default_pvid=1 + + Note. Steps [*] are mandatory. + + +On/off STP +========== + +:: + + ip link set dev BRDEV type bridge stp_state 1/0 + +VLAN configuration +================== + +:: + + bridge vlan add dev br0 vid 1 pvid untagged self <---- add cpu port to VLAN 1 + +Note. This step is mandatory for bridge/default_pvid. + +Add extra VLANs +=============== + + 1. untagged:: + + bridge vlan add dev sw0p1 vid 100 pvid untagged master + bridge vlan add dev sw0p2 vid 100 pvid untagged master + bridge vlan add dev br0 vid 100 pvid untagged self <---- Add cpu port to VLAN100 + + 2. tagged:: + + bridge vlan add dev sw0p1 vid 100 master + bridge vlan add dev sw0p2 vid 100 master + bridge vlan add dev br0 vid 100 pvid tagged self <---- Add cpu port to VLAN100 + +FDBs +---- + +FDBs are automatically added on the appropriate switch port upon detection + +Manually adding FDBs:: + + bridge fdb add aa:bb:cc:dd:ee:ff dev sw0p1 master vlan 100 + bridge fdb add aa:bb:cc:dd:ee:fe dev sw0p2 master <---- Add on all VLANs + +MDBs +---- + +MDBs are automatically added on the appropriate switch port upon detection + +Manually adding MDBs:: + + bridge mdb add dev br0 port sw0p1 grp 239.1.1.1 permanent vid 100 + bridge mdb add dev br0 port sw0p1 grp 239.1.1.1 permanent <---- Add on all VLANs + +Multicast flooding +================== +CPU port mcast_flooding is always on + +Turning flooding on/off on swithch ports: +bridge link set dev sw0p1 mcast_flood on/off + +Access and Trunk port +===================== + +:: + + bridge vlan add dev sw0p1 vid 100 pvid untagged master + bridge vlan add dev sw0p2 vid 100 master + + + bridge vlan add dev br0 vid 100 self + ip link add link br0 name br0.100 type vlan id 100 + +Note. Setting PVID on Bridge device itself works only for +default VLAN (default_pvid). diff --git a/Documentation/networking/devlink/am65-nuss-cpsw-switch.rst b/Documentation/networking/devlink/am65-nuss-cpsw-switch.rst new file mode 100644 index 000000000000..1e589c26abff --- /dev/null +++ b/Documentation/networking/devlink/am65-nuss-cpsw-switch.rst @@ -0,0 +1,26 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================== +am65-cpsw-nuss devlink support +============================== + +This document describes the devlink features implemented by the ``am65-cpsw-nuss`` +device driver. + +Parameters +========== + +The ``am65-cpsw-nuss`` driver implements the following driver-specific +parameters. + +.. list-table:: Driver-specific parameters implemented + :widths: 5 5 5 85 + + * - Name + - Type + - Mode + - Description + * - ``switch_mode`` + - Boolean + - runtime + - Enable switch mode diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index aab79667f97b..8428a1220723 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -45,3 +45,4 @@ parameters, info versions, and other features it supports. sja1105 qed ti-cpsw-switch + am65-nuss-cpsw-switch diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 61a358301f12..581bfce86dca 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -180,7 +180,7 @@ min_adv_mss - INTEGER fib_notify_on_flag_change - INTEGER Whether to emit RTM_NEWROUTE notifications whenever RTM_F_OFFLOAD/ - RTM_F_TRAP flags are changed. + RTM_F_TRAP/RTM_F_OFFLOAD_FAILED flags are changed. After installing a route to the kernel, user space receives an acknowledgment, which means the route was installed in the kernel, @@ -197,6 +197,7 @@ fib_notify_on_flag_change - INTEGER - 0 - Do not emit notifications. - 1 - Emit notifications. + - 2 - Emit notifications only for RTM_F_OFFLOAD_FAILED flag change. IP Fragmentation: @@ -1445,6 +1446,25 @@ rp_filter - INTEGER Default value is 0. Note that some distributions enable it in startup scripts. +src_valid_mark - BOOLEAN + - 0 - The fwmark of the packet is not included in reverse path + route lookup. This allows for asymmetric routing configurations + utilizing the fwmark in only one direction, e.g., transparent + proxying. + + - 1 - The fwmark of the packet is included in reverse path route + lookup. This permits rp_filter to function when the fwmark is + used for routing traffic in both directions. + + This setting also affects the utilization of fmwark when + performing source address selection for ICMP replies, or + determining addresses stored for the IPOPT_TS_TSANDADDR and + IPOPT_RR IP options. + + The max value from conf/{all,interface}/src_valid_mark is used. + + Default value is 0. + arp_filter - BOOLEAN - 1 - Allows you to have multiple network interfaces on the same subnet, and have the ARPs for each interface be answered @@ -1797,7 +1817,7 @@ nexthop_compat_mode - BOOLEAN fib_notify_on_flag_change - INTEGER Whether to emit RTM_NEWROUTE notifications whenever RTM_F_OFFLOAD/ - RTM_F_TRAP flags are changed. + RTM_F_TRAP/RTM_F_OFFLOAD_FAILED flags are changed. After installing a route to the kernel, user space receives an acknowledgment, which means the route was installed in the kernel, @@ -1814,6 +1834,7 @@ fib_notify_on_flag_change - INTEGER - 0 - Do not emit notifications. - 1 - Emit notifications. + - 2 - Emit notifications only for RTM_F_OFFLOAD_FAILED flag change. IPv6 Fragmentation: diff --git a/Documentation/networking/netdev-features.rst b/Documentation/networking/netdev-features.rst index a2d7d7160e39..d7b15bb64deb 100644 --- a/Documentation/networking/netdev-features.rst +++ b/Documentation/networking/netdev-features.rst @@ -182,3 +182,24 @@ stricter than Hardware LRO. A packet stream merged by Hardware GRO must be re-segmentable by GSO or TSO back to the exact original packet stream. Hardware GRO is dependent on RXCSUM since every packet successfully merged by hardware must also have the checksum verified by hardware. + +* hsr-tag-ins-offload + +This should be set for devices which insert an HSR (High-availability Seamless +Redundancy) or PRP (Parallel Redundancy Protocol) tag automatically. + +* hsr-tag-rm-offload + +This should be set for devices which remove HSR (High-availability Seamless +Redundancy) or PRP (Parallel Redundancy Protocol) tags automatically. + +* hsr-fwd-offload + +This should be set for devices which forward HSR (High-availability Seamless +Redundancy) frames from one port to another in hardware. + +* hsr-dup-offload + +This should be set for devices which duplicate outgoing HSR (High-availability +Seamless Redundancy) or PRP (Parallel Redundancy Protocol) tags automatically +frames in hardware. diff --git a/Documentation/virt/kvm/nested-vmx.rst b/Documentation/virt/kvm/nested-vmx.rst index 6ab4e35cee23..ac2095d41f02 100644 --- a/Documentation/virt/kvm/nested-vmx.rst +++ b/Documentation/virt/kvm/nested-vmx.rst @@ -37,8 +37,10 @@ call L2. Running nested VMX ------------------ -The nested VMX feature is disabled by default. It can be enabled by giving -the "nested=1" option to the kvm-intel module. +The nested VMX feature is enabled by default since Linux kernel v4.20. For +older Linux kernel, it can be enabled by giving the "nested=1" option to the +kvm-intel module. + No modifications are required to user space (qemu). However, qemu's default emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be diff --git a/Documentation/virt/kvm/running-nested-guests.rst b/Documentation/virt/kvm/running-nested-guests.rst index d0a1fc754c84..bd70c69468ae 100644 --- a/Documentation/virt/kvm/running-nested-guests.rst +++ b/Documentation/virt/kvm/running-nested-guests.rst @@ -74,7 +74,7 @@ few: Enabling "nested" (x86) ----------------------- -From Linux kernel v4.19 onwards, the ``nested`` KVM parameter is enabled +From Linux kernel v4.20 onwards, the ``nested`` KVM parameter is enabled by default for Intel and AMD. (Though your Linux distribution might override this default.) diff --git a/MAINTAINERS b/MAINTAINERS index d1b0057a9797..99335fd22c0a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2616,8 +2616,8 @@ S: Maintained F: drivers/power/reset/keystone-reset.c ARM/TEXAS INSTRUMENTS K3 ARCHITECTURE -M: Tero Kristo <t-kristo@ti.com> M: Nishanth Menon <nm@ti.com> +M: Tero Kristo <kristo@kernel.org> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported F: Documentation/devicetree/bindings/arm/ti/k3.yaml @@ -3445,6 +3445,15 @@ F: Documentation/devicetree/bindings/mips/brcm/ F: arch/mips/bcm47xx/* F: arch/mips/include/asm/mach-bcm47xx/* +BROADCOM BCM4908 ETHERNET DRIVER +M: RafaÅ‚ MiÅ‚ecki <rafal@milecki.pl> +M: bcm-kernel-feedback-list@broadcom.com +L: netdev@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml +F: drivers/net/ethernet/broadcom/bcm4908_enet.* +F: drivers/net/ethernet/broadcom/unimac.h + BROADCOM BCM5301X ARM ARCHITECTURE M: Hauke Mehrtens <hauke@hauke-m.de> M: RafaÅ‚ MiÅ‚ecki <zajec5@gmail.com> @@ -4327,7 +4336,7 @@ S: Maintained F: .clang-format CLANG/LLVM BUILD SUPPORT -M: Nathan Chancellor <natechancellor@gmail.com> +M: Nathan Chancellor <nathan@kernel.org> M: Nick Desaulniers <ndesaulniers@google.com> L: clang-built-linux@googlegroups.com S: Supported @@ -6497,9 +6506,9 @@ S: Maintained F: drivers/edac/skx_*.[ch] EDAC-TI -M: Tero Kristo <t-kristo@ti.com> +M: Tero Kristo <kristo@kernel.org> L: linux-edac@vger.kernel.org -S: Maintained +S: Odd Fixes F: drivers/edac/ti_edac.c EDIROL UA-101/UA-1000 DRIVER @@ -9582,7 +9591,7 @@ F: Documentation/hwmon/k8temp.rst F: drivers/hwmon/k8temp.c KASAN -M: Andrey Ryabinin <aryabinin@virtuozzo.com> +M: Andrey Ryabinin <ryabinin.a.a@gmail.com> R: Alexander Potapenko <glider@google.com> R: Dmitry Vyukov <dvyukov@google.com> L: kasan-dev@googlegroups.com @@ -10710,6 +10719,8 @@ M: Sunil Goutham <sgoutham@marvell.com> M: Linu Cherian <lcherian@marvell.com> M: Geetha sowjanya <gakula@marvell.com> M: Jerin Jacob <jerinj@marvell.com> +M: hariprasad <hkelam@marvell.com> +M: Subbaraya Sundeep <sbhatta@marvell.com> L: netdev@vger.kernel.org S: Supported F: Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst @@ -17604,7 +17615,7 @@ F: include/linux/dma/k3-psil.h TEXAS INSTRUMENTS' SYSTEM CONTROL INTERFACE (TISCI) PROTOCOL DRIVER M: Nishanth Menon <nm@ti.com> -M: Tero Kristo <t-kristo@ti.com> +M: Tero Kristo <kristo@kernel.org> M: Santosh Shilimkar <ssantosh@kernel.org> L: linux-arm-kernel@lists.infradead.org S: Maintained @@ -17748,9 +17759,9 @@ S: Maintained F: drivers/clk/clk-cdce706.c TI CLOCK DRIVER -M: Tero Kristo <t-kristo@ti.com> +M: Tero Kristo <kristo@kernel.org> L: linux-omap@vger.kernel.org -S: Maintained +S: Odd Fixes F: drivers/clk/ti/ F: include/linux/clk/ti.h @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Kleptomaniac Octopus # *DOCUMENTATION* @@ -452,7 +452,6 @@ AWK = awk INSTALLKERNEL := installkernel DEPMOD = depmod PERL = perl -PYTHON = python PYTHON3 = python3 CHECK = sparse BASH = bash @@ -508,7 +507,7 @@ CLANG_FLAGS := export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL -export PERL PYTHON PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX +export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE @@ -813,10 +812,12 @@ KBUILD_CFLAGS += -ftrivial-auto-var-init=zero KBUILD_CFLAGS += -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-from-clang endif +DEBUG_CFLAGS := + # Workaround for GCC versions < 5.0 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61801 ifdef CONFIG_CC_IS_GCC -DEBUG_CFLAGS := $(call cc-ifversion, -lt, 0500, $(call cc-option, -fno-var-tracking-assignments)) +DEBUG_CFLAGS += $(call cc-ifversion, -lt, 0500, $(call cc-option, -fno-var-tracking-assignments)) endif ifdef CONFIG_DEBUG_INFO @@ -949,12 +950,6 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) # change __FILE__ to the relative path from the srctree KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) -# ensure -fcf-protection is disabled when using retpoline as it is -# incompatible with -mindirect-branch=thunk-extern -ifdef CONFIG_RETPOLINE -KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) -endif - # include additional Makefiles when needed include-y := scripts/Makefile.extrawarn include-$(CONFIG_KASAN) += scripts/Makefile.kasan diff --git a/arch/arm/boot/dts/lpc32xx.dtsi b/arch/arm/boot/dts/lpc32xx.dtsi index 3a5cfb0ddb20..c87066d6c995 100644 --- a/arch/arm/boot/dts/lpc32xx.dtsi +++ b/arch/arm/boot/dts/lpc32xx.dtsi @@ -326,9 +326,6 @@ clocks = <&xtal_32k>, <&xtal>; clock-names = "xtal_32k", "xtal"; - - assigned-clocks = <&clk LPC32XX_CLK_HCLK_PLL>; - assigned-clock-rates = <208000000>; }; }; diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index c8745bc800f7..7b8c18e6605e 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -114,7 +114,7 @@ gpio-sck = <&gpio1 12 GPIO_ACTIVE_HIGH>; gpio-miso = <&gpio1 18 GPIO_ACTIVE_HIGH>; gpio-mosi = <&gpio1 20 GPIO_ACTIVE_HIGH>; - cs-gpios = <&gpio1 19 GPIO_ACTIVE_HIGH>; + cs-gpios = <&gpio1 19 GPIO_ACTIVE_LOW>; num-chipselects = <1>; /* lcd panel */ @@ -124,7 +124,6 @@ spi-max-frequency = <100000>; spi-cpol; spi-cpha; - spi-cs-high; backlight= <&backlight>; label = "lcd"; diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts index 3ea4c5b9fd31..e833c21f1c01 100644 --- a/arch/arm/boot/dts/omap4-droid4-xt894.dts +++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts @@ -16,8 +16,13 @@ debounce-interval = <10>; }; + /* + * We use pad 0x4a100116 abe_dmic_din3.gpio_122 as the irq instead + * of the gpio interrupt to avoid lost events in deeper idle states. + */ slider { label = "Keypad Slide"; + interrupts-extended = <&omap4_pmx_core 0xd6>; gpios = <&gpio4 26 GPIO_ACTIVE_HIGH>; /* gpio122 */ linux,input-type = <EV_SW>; linux,code = <SW_KEYPAD_SLIDE>; diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-drc02.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-drc02.dtsi index 62ab23824a3e..5088dd3a301b 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-drc02.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-drc02.dtsi @@ -33,9 +33,9 @@ * during TX anyway and that it only controls drive enable DE * line. Hence, the RX is always enabled here. */ - rs485-rx-en { + rs485-rx-en-hog { gpio-hog; - gpios = <8 GPIO_ACTIVE_HIGH>; + gpios = <8 0>; output-low; line-name = "rs485-rx-en"; }; @@ -61,9 +61,9 @@ * order to reset the Hub when USB bus is powered down, but * so far there is no such functionality. */ - usb-hub { + usb-hub-hog { gpio-hog; - gpios = <2 GPIO_ACTIVE_HIGH>; + gpios = <2 0>; output-high; line-name = "usb-hub-reset"; }; @@ -87,6 +87,12 @@ }; }; +&i2c4 { + touchscreen@49 { + status = "disabled"; + }; +}; + &i2c5 { /* TP7/TP8 */ pinctrl-names = "default"; pinctrl-0 = <&i2c5_pins_a>; @@ -104,7 +110,7 @@ * are used for on-board microSD slot instead. */ /delete-property/broken-cd; - cd-gpios = <&gpioi 10 (GPIO_ACTIVE_LOW | GPIO_PULL_UP)>; + cd-gpios = <&gpioi 10 GPIO_ACTIVE_HIGH>; disable-wp; }; diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-picoitx.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-picoitx.dtsi index 356150d28c42..32700cca24c8 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-picoitx.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-picoitx.dtsi @@ -43,9 +43,9 @@ * in order to turn on port power when USB bus is powered up, but so * far there is no such functionality. */ - usb-port-power { + usb-port-power-hog { gpio-hog; - gpios = <13 GPIO_ACTIVE_LOW>; + gpios = <13 0>; output-low; line-name = "usb-port-power"; }; diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi index ac46ab363e1b..daff5318f301 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi @@ -390,7 +390,8 @@ pinctrl-0 = <&sdmmc1_b4_pins_a &sdmmc1_dir_pins_a>; pinctrl-1 = <&sdmmc1_b4_od_pins_a &sdmmc1_dir_pins_a>; pinctrl-2 = <&sdmmc1_b4_sleep_pins_a &sdmmc1_dir_sleep_pins_a>; - broken-cd; + cd-gpios = <&gpiog 1 (GPIO_ACTIVE_LOW | GPIO_PULL_UP)>; + disable-wp; st,sig-dir; st,neg-edge; st,use-ckin; diff --git a/arch/arm/boot/dts/sun7i-a20-bananapro.dts b/arch/arm/boot/dts/sun7i-a20-bananapro.dts index 01ccff756996..5740f9442705 100644 --- a/arch/arm/boot/dts/sun7i-a20-bananapro.dts +++ b/arch/arm/boot/dts/sun7i-a20-bananapro.dts @@ -110,7 +110,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_gmac_3v3>; status = "okay"; }; diff --git a/arch/arm/include/asm/kexec-internal.h b/arch/arm/include/asm/kexec-internal.h new file mode 100644 index 000000000000..ecc2322db7aa --- /dev/null +++ b/arch/arm/include/asm/kexec-internal.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ARM_KEXEC_INTERNAL_H +#define _ARM_KEXEC_INTERNAL_H + +struct kexec_relocate_data { + unsigned long kexec_start_address; + unsigned long kexec_indirection_page; + unsigned long kexec_mach_type; + unsigned long kexec_r2; +}; + +#endif diff --git a/arch/arm/include/debug/tegra.S b/arch/arm/include/debug/tegra.S index 98daa7f48314..7454480d084b 100644 --- a/arch/arm/include/debug/tegra.S +++ b/arch/arm/include/debug/tegra.S @@ -149,7 +149,34 @@ .align 99: .word . +#if defined(ZIMAGE) + .word . + 4 +/* + * Storage for the state maintained by the macro. + * + * In the kernel proper, this data is located in arch/arm/mach-tegra/tegra.c. + * That's because this header is included from multiple files, and we only + * want a single copy of the data. In particular, the UART probing code above + * assumes it's running using physical addresses. This is true when this file + * is included from head.o, but not when included from debug.o. So we need + * to share the probe results between the two copies, rather than having + * to re-run the probing again later. + * + * In the decompressor, we put the storage right here, since common.c + * isn't included in the decompressor build. This storage data gets put in + * .text even though it's really data, since .data is discarded from the + * decompressor. Luckily, .text is writeable in the decompressor, unless + * CONFIG_ZBOOT_ROM. That dependency is handled in arch/arm/Kconfig.debug. + */ + /* Debug UART initialization required */ + .word 1 + /* Debug UART physical address */ + .word 0 + /* Debug UART virtual address */ + .word 0 +#else .word tegra_uart_config +#endif .ltorg /* Load previously selected UART address */ @@ -189,30 +216,3 @@ .macro waituarttxrdy,rd,rx .endm - -/* - * Storage for the state maintained by the macros above. - * - * In the kernel proper, this data is located in arch/arm/mach-tegra/tegra.c. - * That's because this header is included from multiple files, and we only - * want a single copy of the data. In particular, the UART probing code above - * assumes it's running using physical addresses. This is true when this file - * is included from head.o, but not when included from debug.o. So we need - * to share the probe results between the two copies, rather than having - * to re-run the probing again later. - * - * In the decompressor, we put the symbol/storage right here, since common.c - * isn't included in the decompressor build. This symbol gets put in .text - * even though it's really data, since .data is discarded from the - * decompressor. Luckily, .text is writeable in the decompressor, unless - * CONFIG_ZBOOT_ROM. That dependency is handled in arch/arm/Kconfig.debug. - */ -#if defined(ZIMAGE) -tegra_uart_config: - /* Debug UART initialization required */ - .word 1 - /* Debug UART physical address */ - .word 0 - /* Debug UART virtual address */ - .word 0 -#endif diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index a1570c8bab25..be8050b0c3df 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -12,6 +12,7 @@ #include <linux/mm.h> #include <linux/dma-mapping.h> #include <asm/cacheflush.h> +#include <asm/kexec-internal.h> #include <asm/glue-df.h> #include <asm/glue-pf.h> #include <asm/mach/arch.h> @@ -170,5 +171,9 @@ int main(void) DEFINE(MPU_RGN_PRBAR, offsetof(struct mpu_rgn, prbar)); DEFINE(MPU_RGN_PRLAR, offsetof(struct mpu_rgn, prlar)); #endif + DEFINE(KEXEC_START_ADDR, offsetof(struct kexec_relocate_data, kexec_start_address)); + DEFINE(KEXEC_INDIR_PAGE, offsetof(struct kexec_relocate_data, kexec_indirection_page)); + DEFINE(KEXEC_MACH_TYPE, offsetof(struct kexec_relocate_data, kexec_mach_type)); + DEFINE(KEXEC_R2, offsetof(struct kexec_relocate_data, kexec_r2)); return 0; } diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 5d84ad333f05..2b09dad7935e 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -13,6 +13,7 @@ #include <linux/of_fdt.h> #include <asm/mmu_context.h> #include <asm/cacheflush.h> +#include <asm/kexec-internal.h> #include <asm/fncpy.h> #include <asm/mach-types.h> #include <asm/smp_plat.h> @@ -22,11 +23,6 @@ extern void relocate_new_kernel(void); extern const unsigned int relocate_new_kernel_size; -extern unsigned long kexec_start_address; -extern unsigned long kexec_indirection_page; -extern unsigned long kexec_mach_type; -extern unsigned long kexec_boot_atags; - static atomic_t waiting_for_crash_ipi; /* @@ -159,6 +155,7 @@ void (*kexec_reinit)(void); void machine_kexec(struct kimage *image) { unsigned long page_list, reboot_entry_phys; + struct kexec_relocate_data *data; void (*reboot_entry)(void); void *reboot_code_buffer; @@ -174,18 +171,17 @@ void machine_kexec(struct kimage *image) reboot_code_buffer = page_address(image->control_code_page); - /* Prepare parameters for reboot_code_buffer*/ - set_kernel_text_rw(); - kexec_start_address = image->start; - kexec_indirection_page = page_list; - kexec_mach_type = machine_arch_type; - kexec_boot_atags = image->arch.kernel_r2; - /* copy our kernel relocation code to the control code page */ reboot_entry = fncpy(reboot_code_buffer, &relocate_new_kernel, relocate_new_kernel_size); + data = reboot_code_buffer + relocate_new_kernel_size; + data->kexec_start_address = image->start; + data->kexec_indirection_page = page_list; + data->kexec_mach_type = machine_arch_type; + data->kexec_r2 = image->arch.kernel_r2; + /* get the identity mapping physical address for the reboot code */ reboot_entry_phys = virt_to_idmap(reboot_entry); diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S index 72a08786e16e..218d524360fc 100644 --- a/arch/arm/kernel/relocate_kernel.S +++ b/arch/arm/kernel/relocate_kernel.S @@ -5,14 +5,16 @@ #include <linux/linkage.h> #include <asm/assembler.h> +#include <asm/asm-offsets.h> #include <asm/kexec.h> .align 3 /* not needed for this code, but keeps fncpy() happy */ ENTRY(relocate_new_kernel) - ldr r0,kexec_indirection_page - ldr r1,kexec_start_address + adr r7, relocate_new_kernel_end + ldr r0, [r7, #KEXEC_INDIR_PAGE] + ldr r1, [r7, #KEXEC_START_ADDR] /* * If there is no indirection page (we are doing crashdumps) @@ -57,34 +59,16 @@ ENTRY(relocate_new_kernel) 2: /* Jump to relocated kernel */ - mov lr,r1 - mov r0,#0 - ldr r1,kexec_mach_type - ldr r2,kexec_boot_atags - ARM( ret lr ) - THUMB( bx lr ) - - .align - - .globl kexec_start_address -kexec_start_address: - .long 0x0 - - .globl kexec_indirection_page -kexec_indirection_page: - .long 0x0 - - .globl kexec_mach_type -kexec_mach_type: - .long 0x0 - - /* phy addr of the atags for the new kernel */ - .globl kexec_boot_atags -kexec_boot_atags: - .long 0x0 + mov lr, r1 + mov r0, #0 + ldr r1, [r7, #KEXEC_MACH_TYPE] + ldr r2, [r7, #KEXEC_R2] + ARM( ret lr ) + THUMB( bx lr ) ENDPROC(relocate_new_kernel) + .align 3 relocate_new_kernel_end: .globl relocate_new_kernel_size diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 9d2e916121be..a3a38d0a4c85 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -693,18 +693,20 @@ struct page *get_signal_page(void) addr = page_address(page); + /* Poison the entire page */ + memset32(addr, __opcode_to_mem_arm(0xe7fddef1), + PAGE_SIZE / sizeof(u32)); + /* Give the signal return code some randomness */ offset = 0x200 + (get_random_int() & 0x7fc); signal_return_offset = offset; - /* - * Copy signal return handlers into the vector page, and - * set sigreturn to be a pointer to these. - */ + /* Copy signal return handlers into the page */ memcpy(addr + offset, sigreturn_codes, sizeof(sigreturn_codes)); - ptr = (unsigned long)addr + offset; - flush_icache_range(ptr, ptr + sizeof(sigreturn_codes)); + /* Flush out all instructions in this page */ + ptr = (unsigned long)addr; + flush_icache_range(ptr, ptr + PAGE_SIZE); return page; } diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c index 416462e3f5d6..f9713dc561cf 100644 --- a/arch/arm/mach-footbridge/dc21285.c +++ b/arch/arm/mach-footbridge/dc21285.c @@ -65,15 +65,15 @@ dc21285_read_config(struct pci_bus *bus, unsigned int devfn, int where, if (addr) switch (size) { case 1: - asm("ldrb %0, [%1, %2]" + asm volatile("ldrb %0, [%1, %2]" : "=r" (v) : "r" (addr), "r" (where) : "cc"); break; case 2: - asm("ldrh %0, [%1, %2]" + asm volatile("ldrh %0, [%1, %2]" : "=r" (v) : "r" (addr), "r" (where) : "cc"); break; case 4: - asm("ldr %0, [%1, %2]" + asm volatile("ldr %0, [%1, %2]" : "=r" (v) : "r" (addr), "r" (where) : "cc"); break; } @@ -99,17 +99,17 @@ dc21285_write_config(struct pci_bus *bus, unsigned int devfn, int where, if (addr) switch (size) { case 1: - asm("strb %0, [%1, %2]" + asm volatile("strb %0, [%1, %2]" : : "r" (value), "r" (addr), "r" (where) : "cc"); break; case 2: - asm("strh %0, [%1, %2]" + asm volatile("strh %0, [%1, %2]" : : "r" (value), "r" (addr), "r" (where) : "cc"); break; case 4: - asm("str %0, [%1, %2]" + asm volatile("str %0, [%1, %2]" : : "r" (value), "r" (addr), "r" (where) : "cc"); break; diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c index a720259099ed..0a4c9b0b13b0 100644 --- a/arch/arm/mach-omap1/board-osk.c +++ b/arch/arm/mach-omap1/board-osk.c @@ -203,6 +203,8 @@ static int osk_tps_setup(struct i2c_client *client, void *context) */ gpio_request(OSK_TPS_GPIO_USB_PWR_EN, "n_vbus_en"); gpio_direction_output(OSK_TPS_GPIO_USB_PWR_EN, 1); + /* Free the GPIO again as the driver will request it */ + gpio_free(OSK_TPS_GPIO_USB_PWR_EN); /* Set GPIO 2 high so LED D3 is off by default */ tps65010_set_gpio_out_value(GPIO2, HIGH); diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 4a59c169a113..4178c0ee46eb 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -17,11 +17,10 @@ config ARCH_OMAP3 bool "TI OMAP3" depends on ARCH_MULTI_V7 select ARCH_OMAP2PLUS - select ARM_CPU_SUSPEND if PM + select ARM_CPU_SUSPEND select OMAP_HWMOD select OMAP_INTERCONNECT - select PM_OPP if PM - select PM if CPU_IDLE + select PM_OPP select SOC_HAS_OMAP2_SDRC select ARM_ERRATA_430973 @@ -30,7 +29,7 @@ config ARCH_OMAP4 depends on ARCH_MULTI_V7 select ARCH_OMAP2PLUS select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP - select ARM_CPU_SUSPEND if PM + select ARM_CPU_SUSPEND select ARM_ERRATA_720789 select ARM_GIC select HAVE_ARM_SCU if SMP @@ -40,7 +39,7 @@ config ARCH_OMAP4 select OMAP_INTERCONNECT_BARRIER select PL310_ERRATA_588369 if CACHE_L2X0 select PL310_ERRATA_727915 if CACHE_L2X0 - select PM_OPP if PM + select PM_OPP select PM if CPU_IDLE select ARM_ERRATA_754322 select ARM_ERRATA_775420 @@ -50,7 +49,7 @@ config SOC_OMAP5 bool "TI OMAP5" depends on ARCH_MULTI_V7 select ARCH_OMAP2PLUS - select ARM_CPU_SUSPEND if PM + select ARM_CPU_SUSPEND select ARM_GIC select HAVE_ARM_SCU if SMP select HAVE_ARM_ARCH_TIMER @@ -58,14 +57,14 @@ config SOC_OMAP5 select OMAP_HWMOD select OMAP_INTERCONNECT select OMAP_INTERCONNECT_BARRIER - select PM_OPP if PM + select PM_OPP select ZONE_DMA if ARM_LPAE config SOC_AM33XX bool "TI AM33XX" depends on ARCH_MULTI_V7 select ARCH_OMAP2PLUS - select ARM_CPU_SUSPEND if PM + select ARM_CPU_SUSPEND config SOC_AM43XX bool "TI AM43x" @@ -79,13 +78,13 @@ config SOC_AM43XX select ARM_ERRATA_754322 select ARM_ERRATA_775420 select OMAP_INTERCONNECT - select ARM_CPU_SUSPEND if PM + select ARM_CPU_SUSPEND config SOC_DRA7XX bool "TI DRA7XX" depends on ARCH_MULTI_V7 select ARCH_OMAP2PLUS - select ARM_CPU_SUSPEND if PM + select ARM_CPU_SUSPEND select ARM_GIC select HAVE_ARM_SCU if SMP select HAVE_ARM_ARCH_TIMER @@ -94,7 +93,7 @@ config SOC_DRA7XX select OMAP_HWMOD select OMAP_INTERCONNECT select OMAP_INTERCONNECT_BARRIER - select PM_OPP if PM + select PM_OPP select ZONE_DMA if ARM_LPAE select PINCTRL_TI_IODELAY if OF && PINCTRL @@ -112,9 +111,11 @@ config ARCH_OMAP2PLUS select OMAP_DM_TIMER select OMAP_GPMC select PINCTRL - select PM_GENERIC_DOMAINS if PM - select PM_GENERIC_DOMAINS_OF if PM + select PM + select PM_GENERIC_DOMAINS + select PM_GENERIC_DOMAINS_OF select RESET_CONTROLLER + select SIMPLE_PM_BUS select SOC_BUS select TI_SYSC select OMAP_IRQCHIP @@ -140,7 +141,6 @@ config ARCH_OMAP2PLUS_TYPICAL select I2C_OMAP select MENELAUS if ARCH_OMAP2 select NEON if CPU_V7 - select PM select REGULATOR select REGULATOR_FIXED_VOLTAGE select TWL4030_CORE if ARCH_OMAP3 || ARCH_OMAP4 diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index c8d317fafe2e..de37027ad758 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -151,10 +151,10 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, (cx->mpu_logic_state == PWRDM_POWER_OFF); /* Enter broadcast mode for periodic timers */ - tick_broadcast_enable(); + RCU_NONIDLE(tick_broadcast_enable()); /* Enter broadcast mode for one-shot timers */ - tick_broadcast_enter(); + RCU_NONIDLE(tick_broadcast_enter()); /* * Call idle CPU PM enter notifier chain so that @@ -166,7 +166,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, if (dev->cpu == 0) { pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state); - omap_set_pwrdm_state(mpu_pd, cx->mpu_state); + RCU_NONIDLE(omap_set_pwrdm_state(mpu_pd, cx->mpu_state)); /* * Call idle CPU cluster PM enter notifier chain @@ -178,7 +178,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, index = 0; cx = state_ptr + index; pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state); - omap_set_pwrdm_state(mpu_pd, cx->mpu_state); + RCU_NONIDLE(omap_set_pwrdm_state(mpu_pd, cx->mpu_state)); mpuss_can_lose_context = 0; } } @@ -194,9 +194,9 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, mpuss_can_lose_context) gic_dist_disable(); - clkdm_deny_idle(cpu_clkdm[1]); - omap_set_pwrdm_state(cpu_pd[1], PWRDM_POWER_ON); - clkdm_allow_idle(cpu_clkdm[1]); + RCU_NONIDLE(clkdm_deny_idle(cpu_clkdm[1])); + RCU_NONIDLE(omap_set_pwrdm_state(cpu_pd[1], PWRDM_POWER_ON)); + RCU_NONIDLE(clkdm_allow_idle(cpu_clkdm[1])); if (IS_PM44XX_ERRATUM(PM_OMAP4_ROM_SMP_BOOT_ERRATUM_GICD) && mpuss_can_lose_context) { @@ -222,7 +222,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, cpu_pm_exit(); cpu_pm_out: - tick_broadcast_exit(); + RCU_NONIDLE(tick_broadcast_exit()); fail: cpuidle_coupled_parallel_barrier(dev, &abort_barrier); diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index cd38bf07c094..2e3a10914c40 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -522,6 +522,7 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = { &dra7_ipu1_dsp_iommu_pdata), #endif /* Common auxdata */ + OF_DEV_AUXDATA("simple-pm-bus", 0, NULL, omap_auxdata_lookup), OF_DEV_AUXDATA("ti,sysc", 0, NULL, &ti_sysc_pdata), OF_DEV_AUXDATA("pinctrl-single", 0, NULL, &pcs_pdata), OF_DEV_AUXDATA("ti,omap-prm-inst", 0, NULL, &ti_prm_pdata), diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi index ba1c6dfdc4b6..d945c84ab697 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi @@ -280,8 +280,6 @@ "timing-adjustment"; rx-fifo-depth = <4096>; tx-fifo-depth = <2048>; - resets = <&reset RESET_ETHERNET>; - reset-names = "stmmaceth"; power-domains = <&pwrc PWRC_AXG_ETHERNET_MEM_ID>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index 9c90d562ada1..b858c5e43cc8 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -224,8 +224,6 @@ "timing-adjustment"; rx-fifo-depth = <4096>; tx-fifo-depth = <2048>; - resets = <&reset RESET_ETHERNET>; - reset-names = "stmmaceth"; status = "disabled"; mdio0: mdio { @@ -2390,7 +2388,7 @@ interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>; dr_mode = "host"; snps,dis_u2_susphy_quirk; - snps,quirk-frame-length-adjustment; + snps,quirk-frame-length-adjustment = <0x20>; snps,parkmode-disable-ss-quirk; }; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 726b91d3a905..0edd137151f8 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -13,7 +13,6 @@ #include <dt-bindings/interrupt-controller/irq.h> #include <dt-bindings/interrupt-controller/arm-gic.h> #include <dt-bindings/power/meson-gxbb-power.h> -#include <dt-bindings/reset/amlogic,meson-gxbb-reset.h> #include <dt-bindings/thermal/thermal.h> / { @@ -576,8 +575,6 @@ interrupt-names = "macirq"; rx-fifo-depth = <4096>; tx-fifo-depth = <2048>; - resets = <&reset RESET_ETHERNET>; - reset-names = "stmmaceth"; power-domains = <&pwrc PWRC_GXBB_ETHERNET_MEM_ID>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-c4.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-c4.dts index cf5a98f0e47c..a712273c905a 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-c4.dts +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid-c4.dts @@ -52,7 +52,7 @@ regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; - gpio = <&gpio_ao GPIOAO_3 GPIO_ACTIVE_HIGH>; + gpio = <&gpio_ao GPIOAO_3 GPIO_OPEN_DRAIN>; enable-active-high; regulator-always-on; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi index 025e1f587662..565934cbfa28 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi @@ -385,7 +385,7 @@ dcfg: dcfg@1ee0000 { compatible = "fsl,ls1046a-dcfg", "syscon"; - reg = <0x0 0x1ee0000 0x0 0x10000>; + reg = <0x0 0x1ee0000 0x0 0x1000>; big-endian; }; diff --git a/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi b/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi index 994a2fce449a..7c329e173a6d 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp11x.dtsi @@ -59,7 +59,7 @@ CP11X_LABEL(ethernet): ethernet@0 { compatible = "marvell,armada-7k-pp22"; - reg = <0x0 0x100000>, <0x129000 0xb000>; + reg = <0x0 0x100000>, <0x129000 0xb000>, <0x220000 0x800>; clocks = <&CP11X_LABEL(clk) 1 3>, <&CP11X_LABEL(clk) 1 9>, <&CP11X_LABEL(clk) 1 5>, <&CP11X_LABEL(clk) 1 6>, <&CP11X_LABEL(clk) 1 18>; diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts index 7cc236575ee2..c0b93813ea9a 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts @@ -415,7 +415,9 @@ &gcc { protected-clocks = <GCC_QSPI_CORE_CLK>, <GCC_QSPI_CORE_CLK_SRC>, - <GCC_QSPI_CNOC_PERIPH_AHB_CLK>; + <GCC_QSPI_CNOC_PERIPH_AHB_CLK>, + <GCC_LPASS_Q6_AXI_CLK>, + <GCC_LPASS_SWAY_CLK>; }; &gpu { diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts index 13fdd02cffe6..8b40f96e9780 100644 --- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts +++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts @@ -302,7 +302,9 @@ &gcc { protected-clocks = <GCC_QSPI_CORE_CLK>, <GCC_QSPI_CORE_CLK_SRC>, - <GCC_QSPI_CNOC_PERIPH_AHB_CLK>; + <GCC_QSPI_CNOC_PERIPH_AHB_CLK>, + <GCC_LPASS_Q6_AXI_CLK>, + <GCC_LPASS_SWAY_CLK>; }; &gpu { @@ -320,6 +322,8 @@ &i2c3 { status = "okay"; clock-frequency = <400000>; + /* Overwrite pinctrl-0 from sdm845.dtsi */ + pinctrl-0 = <&qup_i2c3_default &i2c3_hid_active>; tsel: hid@15 { compatible = "hid-over-i2c"; @@ -327,9 +331,6 @@ hid-descr-addr = <0x1>; interrupts-extended = <&tlmm 37 IRQ_TYPE_LEVEL_HIGH>; - - pinctrl-names = "default"; - pinctrl-0 = <&i2c3_hid_active>; }; tsc2: hid@2c { @@ -338,11 +339,6 @@ hid-descr-addr = <0x20>; interrupts-extended = <&tlmm 37 IRQ_TYPE_LEVEL_HIGH>; - - pinctrl-names = "default"; - pinctrl-0 = <&i2c3_hid_active>; - - status = "disabled"; }; }; diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index 2695ea8cda14..64193292d26c 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -1097,7 +1097,7 @@ vopl_mmu: iommu@ff470f00 { compatible = "rockchip,iommu"; reg = <0x0 0xff470f00 0x0 0x100>; - interrupts = <GIC_SPI 79 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 78 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "vopl_mmu"; clocks = <&cru ACLK_VOPL>, <&cru HCLK_VOPL>; clock-names = "aclk", "iface"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts index 2ee07d15a6e3..1eecad724f04 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts @@ -114,6 +114,10 @@ cpu-supply = <&vdd_arm>; }; +&display_subsystem { + status = "disabled"; +}; + &gmac2io { assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>; assigned-clock-parents = <&gmac_clk>, <&gmac_clk>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts index 06d48338c836..219b7507a10f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts @@ -790,7 +790,6 @@ &pcie0 { bus-scan-delay-ms = <1000>; ep-gpios = <&gpio2 RK_PD4 GPIO_ACTIVE_HIGH>; - max-link-speed = <2>; num-lanes = <4>; pinctrl-names = "default"; pinctrl-0 = <&pcie_clkreqn_cpm>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index f5dee5f447bb..2551b238b97c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -234,6 +234,7 @@ reg = <0x0 0xf8000000 0x0 0x2000000>, <0x0 0xfd000000 0x0 0x1000000>; reg-names = "axi-base", "apb-base"; + device_type = "pci"; #address-cells = <3>; #size-cells = <2>; #interrupt-cells = <1>; @@ -252,7 +253,6 @@ <0 0 0 2 &pcie0_intc 1>, <0 0 0 3 &pcie0_intc 2>, <0 0 0 4 &pcie0_intc 3>; - linux,pci-domain = <0>; max-link-speed = <1>; msi-map = <0x0 &its 0x0 0x1000>; phys = <&pcie_phy 0>, <&pcie_phy 1>, @@ -1278,7 +1278,6 @@ compatible = "rockchip,rk3399-vdec"; reg = <0x0 0xff660000 0x0 0x400>; interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH 0>; - interrupt-names = "vdpu"; clocks = <&cru ACLK_VDU>, <&cru HCLK_VDU>, <&cru SCLK_VDU_CA>, <&cru SCLK_VDU_CORE>; clock-names = "axi", "ahb", "cabac", "core"; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index a0bcf0201261..01aa3eee90e8 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1076,7 +1076,7 @@ CONFIG_INTERCONNECT=y CONFIG_INTERCONNECT_QCOM=y CONFIG_INTERCONNECT_QCOM_MSM8916=m CONFIG_INTERCONNECT_QCOM_OSM_L3=m -CONFIG_INTERCONNECT_QCOM_SDM845=m +CONFIG_INTERCONNECT_QCOM_SDM845=y CONFIG_INTERCONNECT_QCOM_SM8150=m CONFIG_INTERCONNECT_QCOM_SM8250=m CONFIG_EXT2_FS=y diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 99d7e1494aaa..ff4732785c32 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -251,9 +251,9 @@ static inline const void *__tag_set(const void *addr, u8 tag) * lives in the [PAGE_OFFSET, PAGE_END) interval at the bottom of the * kernel's TTBR1 address range. */ -#define __is_lm_address(addr) (((u64)(addr) ^ PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET)) +#define __is_lm_address(addr) (((u64)(addr) - PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET)) -#define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET) +#define __lm_to_phys(addr) (((addr) - PAGE_OFFSET) + PHYS_OFFSET) #define __kimg_to_phys(addr) ((addr) - kimage_voffset) #define __virt_to_phys_nodebug(x) ({ \ @@ -332,7 +332,7 @@ static inline void *phys_to_virt(phys_addr_t x) #endif /* !CONFIG_SPARSEMEM_VMEMMAP || CONFIG_DEBUG_VIRTUAL */ #define virt_addr_valid(addr) ({ \ - __typeof__(addr) __addr = addr; \ + __typeof__(addr) __addr = __tag_reset(addr); \ __is_lm_address(__addr) && pfn_valid(virt_to_pfn(__addr)); \ }) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 31b060a44045..b17bf19217f1 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -47,6 +47,8 @@ __invalid: b . /* + * Only uses x0..x3 so as to not clobber callee-saved SMCCC registers. + * * x0: SMCCC function ID * x1: struct kvm_nvhe_init_params PA */ @@ -70,9 +72,9 @@ __do_hyp_init: eret 1: mov x0, x1 - mov x4, lr - bl ___kvm_hyp_init - mov lr, x4 + mov x3, lr + bl ___kvm_hyp_init // Clobbers x0..x2 + mov lr, x3 /* Hello, World! */ mov x0, #SMCCC_RET_SUCCESS @@ -82,8 +84,8 @@ SYM_CODE_END(__kvm_hyp_init) /* * Initialize the hypervisor in EL2. * - * Only uses x0..x3 so as to not clobber callee-saved SMCCC registers - * and leave x4 for the caller. + * Only uses x0..x2 so as to not clobber callee-saved SMCCC registers + * and leave x3 for the caller. * * x0: struct kvm_nvhe_init_params PA */ @@ -112,9 +114,9 @@ alternative_else_nop_endif /* * Set the PS bits in TCR_EL2. */ - ldr x1, [x0, #NVHE_INIT_TCR_EL2] - tcr_compute_pa_size x1, #TCR_EL2_PS_SHIFT, x2, x3 - msr tcr_el2, x1 + ldr x0, [x0, #NVHE_INIT_TCR_EL2] + tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2 + msr tcr_el2, x0 isb @@ -193,7 +195,7 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu) /* Enable MMU, set vectors and stack. */ mov x0, x28 - bl ___kvm_hyp_init // Clobbers x0..x3 + bl ___kvm_hyp_init // Clobbers x0..x2 /* Leave idmap. */ mov x0, x29 diff --git a/arch/arm64/mm/physaddr.c b/arch/arm64/mm/physaddr.c index 67a9ba9eaa96..cde44c13dda1 100644 --- a/arch/arm64/mm/physaddr.c +++ b/arch/arm64/mm/physaddr.c @@ -9,7 +9,7 @@ phys_addr_t __virt_to_phys(unsigned long x) { - WARN(!__is_lm_address(x), + WARN(!__is_lm_address(__tag_reset(x)), "virt_to_phys used for non-linear address: %pK (%pS)\n", (void *)x, (void *)x); diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 703b1c4f6d12..45d5368d6a99 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -69,7 +69,7 @@ vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) unwcheck: vmlinux - -$(Q)READELF=$(READELF) $(PYTHON) $(srctree)/arch/ia64/scripts/unwcheck.py $< + -$(Q)READELF=$(READELF) $(PYTHON3) $(srctree)/arch/ia64/scripts/unwcheck.py $< archclean: diff --git a/arch/ia64/scripts/unwcheck.py b/arch/ia64/scripts/unwcheck.py index bfd1b671e35f..9581742f0db2 100644 --- a/arch/ia64/scripts/unwcheck.py +++ b/arch/ia64/scripts/unwcheck.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 # # Usage: unwcheck.py FILE diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index fe2ef598e2ea..79ee7750937d 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -51,7 +51,7 @@ obj-y += ptrace/ obj-$(CONFIG_PPC64) += setup_64.o \ paca.o nvram_64.o note.o syscall_64.o obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o -obj-$(CONFIG_VDSO32) += vdso32/ +obj-$(CONFIG_VDSO32) += vdso32_wrapper.o obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_DAWR) += dawr.o @@ -60,7 +60,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o -obj-$(CONFIG_PPC64) += vdso64/ +obj-$(CONFIG_PPC64) += vdso64_wrapper.o obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_BOOK3S_IDLE) += idle_book3s.o procfs-y := proc_powerpc.o diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 9cb6f524854b..7d9a6fee0e3d 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -30,7 +30,7 @@ CC32FLAGS += -m32 KBUILD_CFLAGS := $(filter-out -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc,$(KBUILD_CFLAGS)) endif -targets := $(obj-vdso32) vdso32.so.dbg +targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday.o obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) GCOV_PROFILE := n @@ -46,9 +46,6 @@ obj-y += vdso32_wrapper.o targets += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -Upowerpc -# Force dependency (incbin is bad) -$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so.dbg - # link rule for the .so file, .lds has to be first $(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday.o FORCE $(call if_changed,vdso32ld_and_check) diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32_wrapper.S index 3f5ef035b0a9..3f5ef035b0a9 100644 --- a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S +++ b/arch/powerpc/kernel/vdso32_wrapper.S diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index bf363ff37152..2813e3f98db6 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -17,7 +17,7 @@ endif # Build rules -targets := $(obj-vdso64) vdso64.so.dbg +targets := $(obj-vdso64) vdso64.so.dbg vgettimeofday.o obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) GCOV_PROFILE := n @@ -29,15 +29,9 @@ ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both asflags-y := -D__VDSO64__ -s -obj-y += vdso64_wrapper.o targets += vdso64.lds CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) -$(obj)/vgettimeofday.o: %.o: %.c FORCE - -# Force dependency (incbin is bad) -$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so.dbg - # link rule for the .so file, .lds has to be first $(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday.o FORCE $(call if_changed,vdso64ld_and_check) diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S index bbf68cd01088..2d4067561293 100644 --- a/arch/powerpc/kernel/vdso64/sigtramp.S +++ b/arch/powerpc/kernel/vdso64/sigtramp.S @@ -15,11 +15,20 @@ .text +/* + * __kernel_start_sigtramp_rt64 and __kernel_sigtramp_rt64 together + * are one function split in two parts. The kernel jumps to the former + * and the signal handler indirectly (by blr) returns to the latter. + * __kernel_sigtramp_rt64 needs to point to the return address so + * glibc can correctly identify the trampoline stack frame. + */ .balign 8 .balign IFETCH_ALIGN_BYTES -V_FUNCTION_BEGIN(__kernel_sigtramp_rt64) +V_FUNCTION_BEGIN(__kernel_start_sigtramp_rt64) .Lsigrt_start: bctrl /* call the handler */ +V_FUNCTION_END(__kernel_start_sigtramp_rt64) +V_FUNCTION_BEGIN(__kernel_sigtramp_rt64) addi r1, r1, __SIGNAL_FRAMESIZE li r0,__NR_rt_sigreturn sc diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 6164d1a1ba11..2f3c359cacd3 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -131,4 +131,4 @@ VERSION /* * Make the sigreturn code visible to the kernel. */ -VDSO_sigtramp_rt64 = __kernel_sigtramp_rt64; +VDSO_sigtramp_rt64 = __kernel_start_sigtramp_rt64; diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64_wrapper.S index 1d56d81fe3b3..1d56d81fe3b3 100644 --- a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S +++ b/arch/powerpc/kernel/vdso64_wrapper.S diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index bf7a7d62ae8b..ede093e96234 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -818,13 +818,15 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, break; if (rev) { /* reverse 32 bytes */ - buf.d[0] = byterev_8(reg->d[3]); - buf.d[1] = byterev_8(reg->d[2]); - buf.d[2] = byterev_8(reg->d[1]); - buf.d[3] = byterev_8(reg->d[0]); - reg = &buf; + union vsx_reg buf32[2]; + buf32[0].d[0] = byterev_8(reg[1].d[1]); + buf32[0].d[1] = byterev_8(reg[1].d[0]); + buf32[1].d[0] = byterev_8(reg[0].d[1]); + buf32[1].d[1] = byterev_8(reg[0].d[0]); + memcpy(mem, buf32, size); + } else { + memcpy(mem, reg, size); } - memcpy(mem, reg, size); break; case 16: /* stxv, stxvx, stxvl, stxvll */ diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index e9e2c1f0a690..e0a34eb5ed3b 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -252,8 +252,10 @@ choice default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY config MAXPHYSMEM_1GB + depends on 32BIT bool "1GiB" config MAXPHYSMEM_2GB + depends on 64BIT && CMODEL_MEDLOW bool "2GiB" config MAXPHYSMEM_128GB depends on 64BIT && CMODEL_MEDANY diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 2d50f76efe48..64a675c5c30a 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -135,7 +135,10 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #endif /* __ASSEMBLY__ */ -#define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr))) +#define virt_addr_valid(vaddr) ({ \ + unsigned long _addr = (unsigned long)vaddr; \ + (unsigned long)(_addr) >= PAGE_OFFSET && pfn_valid(virt_to_pfn(_addr)); \ +}) #define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h index 211eb8244a45..8b80c80c7f1a 100644 --- a/arch/riscv/include/asm/set_memory.h +++ b/arch/riscv/include/asm/set_memory.h @@ -32,14 +32,14 @@ bool kernel_page_present(struct page *page); #endif /* __ASSEMBLY__ */ -#ifdef CONFIG_ARCH_HAS_STRICT_KERNEL_RWX +#ifdef CONFIG_STRICT_KERNEL_RWX #ifdef CONFIG_64BIT #define SECTION_ALIGN (1 << 21) #else #define SECTION_ALIGN (1 << 22) #endif -#else /* !CONFIG_ARCH_HAS_STRICT_KERNEL_RWX */ +#else /* !CONFIG_STRICT_KERNEL_RWX */ #define SECTION_ALIGN L1_CACHE_BYTES -#endif /* CONFIG_ARCH_HAS_STRICT_KERNEL_RWX */ +#endif /* CONFIG_STRICT_KERNEL_RWX */ #endif /* _ASM_RISCV_SET_MEMORY_H */ diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 3fa3f26dde85..c7c0655dd45b 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -293,6 +293,8 @@ void free_initmem(void) unsigned long init_begin = (unsigned long)__init_begin; unsigned long init_end = (unsigned long)__init_end; - set_memory_rw_nx(init_begin, (init_end - init_begin) >> PAGE_SHIFT); + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) + set_memory_rw_nx(init_begin, (init_end - init_begin) >> PAGE_SHIFT); + free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 34d302d1a07f..c3030db3325f 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -15,7 +15,6 @@ config UML select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_BUGVERBOSE select NO_DMA - select ARCH_HAS_SET_MEMORY select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES select HAVE_GCC_PLUGINS diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 13b1fe694b90..8e0b43cf089f 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -375,11 +375,11 @@ break_loop: file = NULL; backing_file = strsep(&str, ",:"); - if (*backing_file == '\0') + if (backing_file && *backing_file == '\0') backing_file = NULL; serial = strsep(&str, ",:"); - if (*serial == '\0') + if (serial && *serial == '\0') serial = NULL; if (backing_file && ubd_dev->no_cow) { @@ -1241,7 +1241,7 @@ static int __init ubd_driver_init(void){ /* Letting ubd=sync be like using ubd#s= instead of ubd#= is * enough. So use anyway the io thread. */ } - stack = alloc_stack(0); + stack = alloc_stack(0, 0); io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), &thread_fd); if(io_pid < 0){ diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 27e92d3881ff..5d957b7e7fd5 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -1084,6 +1084,7 @@ static void virtio_uml_release_dev(struct device *d) } os_close_file(vu_dev->sock); + kfree(vu_dev); } /* Platform device */ @@ -1097,7 +1098,7 @@ static int virtio_uml_probe(struct platform_device *pdev) if (!pdata) return -EINVAL; - vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL); + vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL); if (!vu_dev) return -ENOMEM; diff --git a/arch/um/include/asm/io.h b/arch/um/include/asm/io.h index 96f77b5232aa..cef03e3aa0f9 100644 --- a/arch/um/include/asm/io.h +++ b/arch/um/include/asm/io.h @@ -5,7 +5,7 @@ #define ioremap ioremap static inline void __iomem *ioremap(phys_addr_t offset, size_t size) { - return (void __iomem *)(unsigned long)offset; + return NULL; } #define iounmap iounmap diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 39376bb63abf..def376194dce 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -55,15 +55,12 @@ extern unsigned long end_iomem; #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) #define __PAGE_KERNEL_EXEC \ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) -#define __PAGE_KERNEL_RO \ - (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) -#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) /* * The i386 can't do page protection for execute, and considers that the same diff --git a/arch/um/include/asm/set_memory.h b/arch/um/include/asm/set_memory.h deleted file mode 100644 index 24266c63720d..000000000000 --- a/arch/um/include/asm/set_memory.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/set_memory.h> diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h index d8c279e3312f..2888ec812f6e 100644 --- a/arch/um/include/shared/kern_util.h +++ b/arch/um/include/shared/kern_util.h @@ -19,7 +19,7 @@ extern int kmalloc_ok; #define UML_ROUND_UP(addr) \ ((((unsigned long) addr) + PAGE_SIZE - 1) & PAGE_MASK) -extern unsigned long alloc_stack(int atomic); +extern unsigned long alloc_stack(int order, int atomic); extern void free_stack(unsigned long stack, int order); struct pt_regs; diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c index e4abac6c9727..6516ef1f8274 100644 --- a/arch/um/kernel/kmsg_dump.c +++ b/arch/um/kernel/kmsg_dump.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/kmsg_dump.h> #include <linux/console.h> +#include <linux/string.h> #include <shared/init.h> #include <shared/kern.h> #include <os.h> @@ -16,8 +17,12 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper, if (!console_trylock()) return; - for_each_console(con) - break; + for_each_console(con) { + if(strcmp(con->name, "tty") == 0 && + (con->flags & (CON_ENABLED | CON_CONSDEV)) != 0) { + break; + } + } console_unlock(); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 2a986ece5478..81d508daf67c 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -32,7 +32,6 @@ #include <os.h> #include <skas.h> #include <linux/time-internal.h> -#include <asm/set_memory.h> /* * This is a per-cpu array. A processor only modifies its entry and it only @@ -63,18 +62,16 @@ void free_stack(unsigned long stack, int order) free_pages(stack, order); } -unsigned long alloc_stack(int atomic) +unsigned long alloc_stack(int order, int atomic) { - unsigned long addr; + unsigned long page; gfp_t flags = GFP_KERNEL; if (atomic) flags = GFP_ATOMIC; - addr = __get_free_pages(flags, 1); + page = __get_free_pages(flags, order); - set_memory_ro(addr, 1); - - return addr + PAGE_SIZE; + return page; } static inline void set_current(struct task_struct *task) diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index f4db89b5b5a6..315248b03941 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -535,6 +535,31 @@ invalid_number: return 1; } + +static void time_travel_set_start(void) +{ + if (time_travel_start_set) + return; + + switch (time_travel_mode) { + case TT_MODE_EXTERNAL: + time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1); + /* controller gave us the *current* time, so adjust by that */ + time_travel_ext_get_time(); + time_travel_start -= time_travel_time; + break; + case TT_MODE_INFCPU: + case TT_MODE_BASIC: + if (!time_travel_start_set) + time_travel_start = os_persistent_clock_emulation(); + break; + case TT_MODE_OFF: + /* we just read the host clock with os_persistent_clock_emulation() */ + break; + } + + time_travel_start_set = true; +} #else /* CONFIG_UML_TIME_TRAVEL_SUPPORT */ #define time_travel_start_set 0 #define time_travel_start 0 @@ -553,6 +578,10 @@ static void time_travel_set_interval(unsigned long long interval) { } +static inline void time_travel_set_start(void) +{ +} + /* fail link if this actually gets used */ extern u64 time_travel_ext_req(u32 op, u64 time); @@ -731,6 +760,8 @@ void read_persistent_clock64(struct timespec64 *ts) { long long nsecs; + time_travel_set_start(); + if (time_travel_mode != TT_MODE_OFF) nsecs = time_travel_start + time_travel_time; else @@ -742,25 +773,6 @@ void read_persistent_clock64(struct timespec64 *ts) void __init time_init(void) { -#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT - switch (time_travel_mode) { - case TT_MODE_EXTERNAL: - time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1); - /* controller gave us the *current* time, so adjust by that */ - time_travel_ext_get_time(); - time_travel_start -= time_travel_time; - break; - case TT_MODE_INFCPU: - case TT_MODE_BASIC: - if (!time_travel_start_set) - time_travel_start = os_persistent_clock_emulation(); - break; - case TT_MODE_OFF: - /* we just read the host clock with os_persistent_clock_emulation() */ - break; - } -#endif - timer_set_signal_handler(); late_time_init = um_timer_setup; } diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 437d1f1cc5ec..61776790cd67 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -608,57 +608,3 @@ void force_flush_all(void) vma = vma->vm_next; } } - -struct page_change_data { - unsigned int set_mask, clear_mask; -}; - -static int change_page_range(pte_t *ptep, unsigned long addr, void *data) -{ - struct page_change_data *cdata = data; - pte_t pte = READ_ONCE(*ptep); - - pte_clear_bits(pte, cdata->clear_mask); - pte_set_bits(pte, cdata->set_mask); - - set_pte(ptep, pte); - return 0; -} - -static int change_memory(unsigned long start, unsigned long pages, - unsigned int set_mask, unsigned int clear_mask) -{ - unsigned long size = pages * PAGE_SIZE; - struct page_change_data data; - int ret; - - data.set_mask = set_mask; - data.clear_mask = clear_mask; - - ret = apply_to_page_range(&init_mm, start, size, change_page_range, - &data); - - flush_tlb_kernel_range(start, start + size); - - return ret; -} - -int set_memory_ro(unsigned long addr, int numpages) -{ - return change_memory(addr, numpages, 0, _PAGE_RW); -} - -int set_memory_rw(unsigned long addr, int numpages) -{ - return change_memory(addr, numpages, _PAGE_RW, 0); -} - -int set_memory_nx(unsigned long addr, int numpages) -{ - return -EOPNOTSUPP; -} - -int set_memory_x(unsigned long addr, int numpages) -{ - return -EOPNOTSUPP; -} diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 31d356b1ffd8..80e2660782a0 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -26,7 +26,8 @@ #include <mem_user.h> #include <os.h> -#define DEFAULT_COMMAND_LINE "root=98:0" +#define DEFAULT_COMMAND_LINE_ROOT "root=98:0" +#define DEFAULT_COMMAND_LINE_CONSOLE "console=tty" /* Changed in add_arg and setup_arch, which run before SMP is started */ static char __initdata command_line[COMMAND_LINE_SIZE] = { 0 }; @@ -109,7 +110,8 @@ unsigned long end_vm; int ncpus = 1; /* Set in early boot */ -static int have_root __initdata = 0; +static int have_root __initdata; +static int have_console __initdata; /* Set in uml_mem_setup and modified in linux_main */ long long physmem_size = 32 * 1024 * 1024; @@ -161,6 +163,17 @@ __uml_setup("debug", no_skas_debug_setup, " this flag is not needed to run gdb on UML in skas mode\n\n" ); +static int __init uml_console_setup(char *line, int *add) +{ + have_console = 1; + return 0; +} + +__uml_setup("console=", uml_console_setup, +"console=<preferred console>\n" +" Specify the preferred console output driver\n\n" +); + static int __init Usage(char *line, int *add) { const char **p; @@ -264,7 +277,10 @@ int __init linux_main(int argc, char **argv) add_arg(argv[i]); } if (have_root == 0) - add_arg(DEFAULT_COMMAND_LINE); + add_arg(DEFAULT_COMMAND_LINE_ROOT); + + if (have_console == 0) + add_arg(DEFAULT_COMMAND_LINE_CONSOLE); host_task_size = os_get_top_address(); /* diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index feb48d796e00..9fa6e4187d4f 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c @@ -45,7 +45,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) unsigned long stack, sp; int pid, fds[2], ret, n; - stack = alloc_stack(__cant_sleep()); + stack = alloc_stack(0, __cant_sleep()); if (stack == 0) return -ENOMEM; @@ -116,7 +116,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, unsigned long stack, sp; int pid, status, err; - stack = alloc_stack(__cant_sleep()); + stack = alloc_stack(0, __cant_sleep()); if (stack == 0) return -ENOMEM; diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index a61cbf73a179..6c5041c5560b 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -104,5 +104,18 @@ long long os_nsecs(void) */ void os_idle_sleep(void) { - pause(); + struct itimerspec its; + sigset_t set, old; + + /* block SIGALRM while we analyze the timer state */ + sigemptyset(&set); + sigaddset(&set, SIGALRM); + sigprocmask(SIG_BLOCK, &set, &old); + + /* check the timer, and if it'll fire then wait for it */ + timer_gettime(event_high_res_timer, &its); + if (its.it_value.tv_sec || its.it_value.tv_nsec) + sigsuspend(&old); + /* either way, restore the signal mask */ + sigprocmask(SIG_UNBLOCK, &set, NULL); } diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 7116da3980be..5857917f83ee 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -120,6 +120,9 @@ else KBUILD_CFLAGS += -mno-red-zone KBUILD_CFLAGS += -mcmodel=kernel + + # Intel CET isn't enabled in the kernel + KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) endif ifdef CONFIG_X86_X32 diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 34cb3c159481..412b51e059c8 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -197,16 +197,6 @@ static inline bool apic_needs_pit(void) { return true; } #endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_X2APIC -/* - * Make previous memory operations globally visible before - * sending the IPI through x2apic wrmsr. We need a serializing instruction or - * mfence for this. - */ -static inline void x2apic_wrmsr_fence(void) -{ - asm volatile("mfence" : : : "memory"); -} - static inline void native_apic_msr_write(u32 reg, u32 v) { if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 7f828fe49797..4819d5e5a335 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -84,4 +84,22 @@ do { \ #include <asm-generic/barrier.h> +/* + * Make previous memory operations globally visible before + * a WRMSR. + * + * MFENCE makes writes visible, but only affects load/store + * instructions. WRMSR is unfortunately not a load/store + * instruction and is unaffected by MFENCE. The LFENCE ensures + * that the WRMSR is not reordered. + * + * Most WRMSRs are full serializing instructions themselves and + * do not require this barrier. This is only required for the + * IA32_TSC_DEADLINE and X2APIC MSRs. + */ +static inline void weak_wrmsr_fence(void) +{ + asm volatile("mfence; lfence" : : : "memory"); +} + #endif /* _ASM_X86_BARRIER_H */ diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 6fe54b2813c1..2b87b191b3b8 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -43,8 +43,6 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs) } #define arch_check_user_regs arch_check_user_regs -#define ARCH_SYSCALL_EXIT_WORK (_TIF_SINGLESTEP) - static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, unsigned long ti_work) { diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 6bd20c0de8bc..7f4c081f59f0 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -41,6 +41,7 @@ #include <asm/perf_event.h> #include <asm/x86_init.h> #include <linux/atomic.h> +#include <asm/barrier.h> #include <asm/mpspec.h> #include <asm/i8259.h> #include <asm/proto.h> @@ -477,6 +478,9 @@ static int lapic_next_deadline(unsigned long delta, { u64 tsc; + /* This MSR is special and need a special fence: */ + weak_wrmsr_fence(); + tsc = rdtsc(); wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); return 0; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index df6adc5674c9..f4da9bb69a88 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -29,7 +29,8 @@ static void x2apic_send_IPI(int cpu, int vector) { u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu); - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL); } @@ -41,7 +42,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) unsigned long flags; u32 dest; - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); local_irq_save(flags); tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask); diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 0e4e81971567..6bde05a86b4e 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -43,7 +43,8 @@ static void x2apic_send_IPI(int cpu, int vector) { u32 dest = per_cpu(x86_cpu_to_apicid, cpu); - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); __x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL); } @@ -54,7 +55,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) unsigned long this_cpu; unsigned long flags; - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); local_irq_save(flags); @@ -125,7 +127,8 @@ void __x2apic_send_IPI_shorthand(int vector, u32 which) { unsigned long cfg = __prepare_ICR(which, vector, 0); - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); native_x2apic_icr_write(cfg, 0); } diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 59a1e3ce3f14..816fdbec795a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1159,6 +1159,7 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 1), {} }; diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 03aa33b58165..668a4a6533d9 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -269,6 +269,20 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end) CPU_ENTRY_AREA_TOTAL_SIZE)) return true; + /* + * When FSGSBASE is enabled, paranoid_entry() fetches the per-CPU + * GSBASE value via __per_cpu_offset or pcpu_unit_offsets. + */ +#ifdef CONFIG_SMP + if (within_area(addr, end, (unsigned long)__per_cpu_offset, + sizeof(unsigned long) * nr_cpu_ids)) + return true; +#else + if (within_area(addr, end, (unsigned long)&pcpu_unit_offsets, + sizeof(pcpu_unit_offsets))) + return true; +#endif + for_each_possible_cpu(cpu) { /* The original rw GDT is being used after load_direct_gdt() */ if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu), @@ -293,6 +307,14 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end) (unsigned long)&per_cpu(cpu_tlbstate, cpu), sizeof(struct tlb_state))) return true; + + /* + * When in guest (X86_FEATURE_HYPERVISOR), local_db_save() + * will read per-cpu cpu_dr7 before clear dr7 register. + */ + if (within_area(addr, end, (unsigned long)&per_cpu(cpu_dr7, cpu), + sizeof(cpu_dr7))) + return true; } return false; @@ -491,15 +513,12 @@ static int hw_breakpoint_handler(struct die_args *args) struct perf_event *bp; unsigned long *dr6_p; unsigned long dr6; + bool bpx; /* The DR6 value is pointed by args->err */ dr6_p = (unsigned long *)ERR_PTR(args->err); dr6 = *dr6_p; - /* If it's a single step, TRAP bits are random */ - if (dr6 & DR_STEP) - return NOTIFY_DONE; - /* Do an early return if no trap bits are set in DR6 */ if ((dr6 & DR_TRAP_BITS) == 0) return NOTIFY_DONE; @@ -509,28 +528,29 @@ static int hw_breakpoint_handler(struct die_args *args) if (likely(!(dr6 & (DR_TRAP0 << i)))) continue; + bp = this_cpu_read(bp_per_reg[i]); + if (!bp) + continue; + + bpx = bp->hw.info.type == X86_BREAKPOINT_EXECUTE; + /* - * The counter may be concurrently released but that can only - * occur from a call_rcu() path. We can then safely fetch - * the breakpoint, use its callback, touch its counter - * while we are in an rcu_read_lock() path. + * TF and data breakpoints are traps and can be merged, however + * instruction breakpoints are faults and will be raised + * separately. + * + * However DR6 can indicate both TF and instruction + * breakpoints. In that case take TF as that has precedence and + * delay the instruction breakpoint for the next exception. */ - rcu_read_lock(); + if (bpx && (dr6 & DR_STEP)) + continue; - bp = this_cpu_read(bp_per_reg[i]); /* * Reset the 'i'th TRAP bit in dr6 to denote completion of * exception handling */ (*dr6_p) &= ~(DR_TRAP0 << i); - /* - * bp can be NULL due to lazy debug register switching - * or due to concurrent perf counter removing. - */ - if (!bp) { - rcu_read_unlock(); - break; - } perf_bp_event(bp, args->regs); @@ -538,11 +558,10 @@ static int hw_breakpoint_handler(struct die_args *args) * Set up resume flag to avoid breakpoint recursion when * returning back to origin. */ - if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) + if (bpx) args->regs->flags |= X86_EFLAGS_RF; - - rcu_read_unlock(); } + /* * Further processing in do_debug() is needed for a) user-space * breakpoints (to generate signals) and b) when the system has diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 3412c4595efd..740f3bdb3f61 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -661,6 +661,17 @@ static void __init trim_platform_memory_ranges(void) static void __init trim_bios_range(void) { /* + * A special case is the first 4Kb of memory; + * This is a BIOS owned area, not kernel ram, but generally + * not listed as such in the E820 table. + * + * This typically reserves additional memory (64KiB by default) + * since some BIOSes are known to corrupt low memory. See the + * Kconfig help text for X86_RESERVE_LOW. + */ + e820__range_update(0, PAGE_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); + + /* * special case: Some BIOSes report the PC BIOS * area (640Kb -> 1Mb) as RAM even though it is not. * take them out. @@ -717,15 +728,6 @@ early_param("reservelow", parse_reservelow); static void __init trim_low_memory_range(void) { - /* - * A special case is the first 4Kb of memory; - * This is a BIOS owned area, not kernel ram, but generally - * not listed as such in the E820 table. - * - * This typically reserves additional memory (64KiB by default) - * since some BIOSes are known to corrupt low memory. See the - * Kconfig help text for X86_RESERVE_LOW. - */ memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE)); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 117e24fbfd8a..02813a7f3a7c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1833,6 +1833,7 @@ void arch_set_max_freq_ratio(bool turbo_disabled) arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE : arch_turbo_freq_ratio; } +EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio); static bool turbo_disabled(void) { diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 60d2c3798ba2..0f3c307b37b3 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -127,12 +127,17 @@ static int enable_single_step(struct task_struct *child) regs->flags |= X86_EFLAGS_TF; /* - * Always set TIF_SINGLESTEP - this guarantees that - * we single-step system calls etc.. This will also + * Always set TIF_SINGLESTEP. This will also * cause us to set TF when returning to user mode. */ set_tsk_thread_flag(child, TIF_SINGLESTEP); + /* + * Ensure that a trap is triggered once stepping out of a system + * call prior to executing any user instruction. + */ + set_task_syscall_work(child, SYSCALL_EXIT_TRAP); + oflags = regs->flags; /* Set TF on the kernel stack.. */ @@ -230,6 +235,7 @@ void user_disable_single_step(struct task_struct *child) /* Always clear TIF_SINGLESTEP... */ clear_tsk_thread_flag(child, TIF_SINGLESTEP); + clear_task_syscall_work(child, SYSCALL_EXIT_TRAP); /* But touch TF only if it was set by us.. */ if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF)) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 13036cf0b912..38172ca627d3 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -321,7 +321,7 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, if (cpuid->nent < vcpu->arch.cpuid_nent) goto out; r = -EFAULT; - if (copy_to_user(entries, &vcpu->arch.cpuid_entries, + if (copy_to_user(entries, vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) goto out; return 0; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 56cae1ff9e3f..66a08322988f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2879,6 +2879,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data : (u32)msr_data; + if (efer & EFER_LMA) + ctxt->mode = X86EMUL_MODE_PROT64; return X86EMUL_CONTINUE; } diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 2ef8615f9dba..b56d604809b8 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1049,8 +1049,8 @@ bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot) } /* - * Clear non-leaf entries (and free associated page tables) which could - * be replaced by large mappings, for GFNs within the slot. + * Clear leaf entries which could be replaced by large mappings, for + * GFNs within the slot. */ static void zap_collapsible_spte_range(struct kvm *kvm, struct kvm_mmu_page *root, @@ -1062,7 +1062,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm, tdp_root_for_each_pte(iter, root, start, end) { if (!is_shadow_present_pte(iter.old_spte) || - is_last_spte(iter.old_spte, iter.level)) + !is_last_spte(iter.old_spte, iter.level)) continue; pfn = spte_to_pfn(iter.old_spte); diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 7a605ad8254d..db30670dd8c4 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -231,6 +231,7 @@ static bool nested_vmcb_check_controls(struct vmcb_control_area *control) static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12) { + struct kvm_vcpu *vcpu = &svm->vcpu; bool vmcb12_lma; if ((vmcb12->save.efer & EFER_SVME) == 0) @@ -244,18 +245,10 @@ static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12) vmcb12_lma = (vmcb12->save.efer & EFER_LME) && (vmcb12->save.cr0 & X86_CR0_PG); - if (!vmcb12_lma) { - if (vmcb12->save.cr4 & X86_CR4_PAE) { - if (vmcb12->save.cr3 & MSR_CR3_LEGACY_PAE_RESERVED_MASK) - return false; - } else { - if (vmcb12->save.cr3 & MSR_CR3_LEGACY_RESERVED_MASK) - return false; - } - } else { + if (vmcb12_lma) { if (!(vmcb12->save.cr4 & X86_CR4_PAE) || !(vmcb12->save.cr0 & X86_CR0_PE) || - (vmcb12->save.cr3 & MSR_CR3_LONG_MBZ_MASK)) + (vmcb12->save.cr3 & vcpu->arch.cr3_lm_rsvd_bits)) return false; } if (!kvm_is_valid_cr4(&svm->vcpu, vmcb12->save.cr4)) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index ac652bc476ae..48017fef1cd9 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -342,6 +342,8 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, unsigned long first, last; int ret; + lockdep_assert_held(&kvm->lock); + if (ulen == 0 || uaddr + ulen < uaddr) return ERR_PTR(-EINVAL); @@ -1119,12 +1121,20 @@ int svm_register_enc_region(struct kvm *kvm, if (!region) return -ENOMEM; + mutex_lock(&kvm->lock); region->pages = sev_pin_memory(kvm, range->addr, range->size, ®ion->npages, 1); if (IS_ERR(region->pages)) { ret = PTR_ERR(region->pages); + mutex_unlock(&kvm->lock); goto e_free; } + region->uaddr = range->addr; + region->size = range->size; + + list_add_tail(®ion->list, &sev->regions_list); + mutex_unlock(&kvm->lock); + /* * The guest may change the memory encryption attribute from C=0 -> C=1 * or vice versa for this memory range. Lets make sure caches are @@ -1133,13 +1143,6 @@ int svm_register_enc_region(struct kvm *kvm, */ sev_clflush_pages(region->pages, region->npages); - region->uaddr = range->addr; - region->size = range->size; - - mutex_lock(&kvm->lock); - list_add_tail(®ion->list, &sev->regions_list); - mutex_unlock(&kvm->lock); - return ret; e_free: diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f923e14e87df..3442d44ca53b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -454,6 +454,11 @@ static int has_svm(void) return 0; } + if (sev_active()) { + pr_info("KVM is unsupported when running as an SEV guest\n"); + return 0; + } + return 1; } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 0fe874ae5498..6e7d070f8b86 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -403,9 +403,6 @@ static inline bool gif_set(struct vcpu_svm *svm) } /* svm.c */ -#define MSR_CR3_LEGACY_RESERVED_MASK 0xfe7U -#define MSR_CR3_LEGACY_PAE_RESERVED_MASK 0x7U -#define MSR_CR3_LONG_MBZ_MASK 0xfff0000000000000U #define MSR_INVALID 0xffffffffU extern int sev; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index cc60b1fc3ee7..eb69fef57485 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6860,11 +6860,20 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) switch (index) { case MSR_IA32_TSX_CTRL: /* - * No need to pass TSX_CTRL_CPUID_CLEAR through, so - * let's avoid changing CPUID bits under the host - * kernel's feet. + * TSX_CTRL_CPUID_CLEAR is handled in the CPUID + * interception. Keep the host value unchanged to avoid + * changing CPUID bits under the host kernel's feet. + * + * hle=0, rtm=0, tsx_ctrl=1 can be found with some + * combinations of new kernel and old userspace. If + * those guests run on a tsx=off host, do allow guests + * to use TSX_CTRL, but do not change the value on the + * host so that TSX remains always disabled. */ - vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; + if (boot_cpu_has(X86_FEATURE_RTM)) + vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; + else + vmx->guest_uret_msrs[j].mask = 0; break; default: vmx->guest_uret_msrs[j].mask = -1ull; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 76bce832cade..1b404e4d7dd8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1394,16 +1394,24 @@ static u64 kvm_get_arch_capabilities(void) if (!boot_cpu_has_bug(X86_BUG_MDS)) data |= ARCH_CAP_MDS_NO; - /* - * On TAA affected systems: - * - nothing to do if TSX is disabled on the host. - * - we emulate TSX_CTRL if present on the host. - * This lets the guest use VERW to clear CPU buffers. - */ - if (!boot_cpu_has(X86_FEATURE_RTM)) - data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR); - else if (!boot_cpu_has_bug(X86_BUG_TAA)) + if (!boot_cpu_has(X86_FEATURE_RTM)) { + /* + * If RTM=0 because the kernel has disabled TSX, the host might + * have TAA_NO or TSX_CTRL. Clear TAA_NO (the guest sees RTM=0 + * and therefore knows that there cannot be TAA) but keep + * TSX_CTRL: some buggy userspaces leave it set on tsx=on hosts, + * and we want to allow migrating those guests to tsx=off hosts. + */ + data &= ~ARCH_CAP_TAA_NO; + } else if (!boot_cpu_has_bug(X86_BUG_TAA)) { data |= ARCH_CAP_TAA_NO; + } else { + /* + * Nothing to do here; we emulate TSX_CTRL if present on the + * host so the guest can choose between disabling TSX or + * using VERW to clear CPU buffers. + */ + } return data; } @@ -9616,6 +9624,8 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) */ if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA)) return false; + if (sregs->cr3 & vcpu->arch.cr3_lm_rsvd_bits) + return false; } else { /* * Not in 64-bit mode: EFER.LMA is clear and the code @@ -9993,6 +10003,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) fx_init(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); + vcpu->arch.cr3_lm_rsvd_bits = rsvd_bits(cpuid_maxphyaddr(vcpu), 63); vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; @@ -10494,7 +10505,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, return 0; old_npages = slot->npages; - hva = 0; + hva = slot->userspace_addr; } for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index c5ee0f5ce0f1..0f727b50bd3d 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -425,6 +425,8 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type); __reserved_bits |= X86_CR4_UMIP; \ if (!__cpu_has(__c, X86_FEATURE_VMX)) \ __reserved_bits |= X86_CR4_VMXE; \ + if (!__cpu_has(__c, X86_FEATURE_PCID)) \ + __reserved_bits |= X86_CR4_PCIDE; \ __reserved_bits; \ }) diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index c79e5736ab2b..c3d5f0236f35 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -382,6 +382,7 @@ bool sev_active(void) { return sev_status & MSR_AMD64_SEV_ENABLED; } +EXPORT_SYMBOL_GPL(sev_active); /* Needs to be called from non-instrumentable code */ bool noinstr sev_es_active(void) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index e1e8d4e3a213..8efd003540ca 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -115,31 +115,12 @@ void efi_sync_low_kernel_mappings(void) pud_t *pud_k, *pud_efi; pgd_t *efi_pgd = efi_mm.pgd; - /* - * We can share all PGD entries apart from the one entry that - * covers the EFI runtime mapping space. - * - * Make sure the EFI runtime region mappings are guaranteed to - * only span a single PGD entry and that the entry also maps - * other important kernel regions. - */ - MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); - MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != - (EFI_VA_END & PGDIR_MASK)); - pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); pgd_k = pgd_offset_k(PAGE_OFFSET); num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); - /* - * As with PGDs, we share all P4D entries apart from the one entry - * that covers the EFI runtime mapping space. - */ - BUILD_BUG_ON(p4d_index(EFI_VA_END) != p4d_index(MODULES_END)); - BUILD_BUG_ON((EFI_VA_START & P4D_MASK) != (EFI_VA_END & P4D_MASK)); - pgd_efi = efi_pgd + pgd_index(EFI_VA_END); pgd_k = pgd_offset_k(EFI_VA_END); p4d_efi = p4d_offset(pgd_efi, 0); diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 9e4eb0fc1c16..9e81d1052091 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -6332,13 +6332,13 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, * limit 'something'. */ /* no more than 50% of tags for async I/O */ - bfqd->word_depths[0][0] = max(bt->sb.depth >> 1, 1U); + bfqd->word_depths[0][0] = max((1U << bt->sb.shift) >> 1, 1U); /* * no more than 75% of tags for sync writes (25% extra tags * w.r.t. async I/O, to prevent async I/O from starving sync * writes) */ - bfqd->word_depths[0][1] = max((bt->sb.depth * 3) >> 2, 1U); + bfqd->word_depths[0][1] = max(((1U << bt->sb.shift) * 3) >> 2, 1U); /* * In-word depths in case some bfq_queue is being weight- @@ -6348,9 +6348,9 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, * shortage. */ /* no more than ~18% of tags for async I/O */ - bfqd->word_depths[1][0] = max((bt->sb.depth * 3) >> 4, 1U); + bfqd->word_depths[1][0] = max(((1U << bt->sb.shift) * 3) >> 4, 1U); /* no more than ~37% of tags for sync writes (~20% extra tags) */ - bfqd->word_depths[1][1] = max((bt->sb.depth * 6) >> 4, 1U); + bfqd->word_depths[1][1] = max(((1U << bt->sb.shift) * 6) >> 4, 1U); for (i = 0; i < 2; i++) for (j = 0; j < 2; j++) diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c index d2c8d8279e7a..24c197d91f29 100644 --- a/drivers/acpi/acpica/nsrepair2.c +++ b/drivers/acpi/acpica/nsrepair2.c @@ -495,8 +495,9 @@ acpi_ns_repair_HID(struct acpi_evaluate_info *info, union acpi_operand_object **return_object_ptr) { union acpi_operand_object *return_object = *return_object_ptr; - char *dest; + union acpi_operand_object *new_string; char *source; + char *dest; ACPI_FUNCTION_NAME(ns_repair_HID); @@ -517,6 +518,13 @@ acpi_ns_repair_HID(struct acpi_evaluate_info *info, return_ACPI_STATUS(AE_OK); } + /* It is simplest to always create a new string object */ + + new_string = acpi_ut_create_string_object(return_object->string.length); + if (!new_string) { + return_ACPI_STATUS(AE_NO_MEMORY); + } + /* * Remove a leading asterisk if present. For some unknown reason, there * are many machines in the field that contains IDs like this. @@ -526,7 +534,7 @@ acpi_ns_repair_HID(struct acpi_evaluate_info *info, source = return_object->string.pointer; if (*source == '*') { source++; - return_object->string.length--; + new_string->string.length--; ACPI_DEBUG_PRINT((ACPI_DB_REPAIR, "%s: Removed invalid leading asterisk\n", @@ -541,11 +549,12 @@ acpi_ns_repair_HID(struct acpi_evaluate_info *info, * "NNNN####" where N is an uppercase letter or decimal digit, and * # is a hex digit. */ - for (dest = return_object->string.pointer; *source; dest++, source++) { + for (dest = new_string->string.pointer; *source; dest++, source++) { *dest = (char)toupper((int)*source); } - return_object->string.pointer[return_object->string.length] = 0; + acpi_ut_remove_reference(return_object); + *return_object_ptr = new_string; return_ACPI_STATUS(AE_OK); } diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index b11b08a60684..8c5dde628405 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2269,40 +2269,24 @@ static const struct attribute_group *acpi_nfit_region_attribute_groups[] = { /* enough info to uniquely specify an interleave set */ struct nfit_set_info { - struct nfit_set_info_map { - u64 region_offset; - u32 serial_number; - u32 pad; - } mapping[0]; + u64 region_offset; + u32 serial_number; + u32 pad; }; struct nfit_set_info2 { - struct nfit_set_info_map2 { - u64 region_offset; - u32 serial_number; - u16 vendor_id; - u16 manufacturing_date; - u8 manufacturing_location; - u8 reserved[31]; - } mapping[0]; + u64 region_offset; + u32 serial_number; + u16 vendor_id; + u16 manufacturing_date; + u8 manufacturing_location; + u8 reserved[31]; }; -static size_t sizeof_nfit_set_info(int num_mappings) -{ - return sizeof(struct nfit_set_info) - + num_mappings * sizeof(struct nfit_set_info_map); -} - -static size_t sizeof_nfit_set_info2(int num_mappings) -{ - return sizeof(struct nfit_set_info2) - + num_mappings * sizeof(struct nfit_set_info_map2); -} - static int cmp_map_compat(const void *m0, const void *m1) { - const struct nfit_set_info_map *map0 = m0; - const struct nfit_set_info_map *map1 = m1; + const struct nfit_set_info *map0 = m0; + const struct nfit_set_info *map1 = m1; return memcmp(&map0->region_offset, &map1->region_offset, sizeof(u64)); @@ -2310,8 +2294,8 @@ static int cmp_map_compat(const void *m0, const void *m1) static int cmp_map(const void *m0, const void *m1) { - const struct nfit_set_info_map *map0 = m0; - const struct nfit_set_info_map *map1 = m1; + const struct nfit_set_info *map0 = m0; + const struct nfit_set_info *map1 = m1; if (map0->region_offset < map1->region_offset) return -1; @@ -2322,8 +2306,8 @@ static int cmp_map(const void *m0, const void *m1) static int cmp_map2(const void *m0, const void *m1) { - const struct nfit_set_info_map2 *map0 = m0; - const struct nfit_set_info_map2 *map1 = m1; + const struct nfit_set_info2 *map0 = m0; + const struct nfit_set_info2 *map1 = m1; if (map0->region_offset < map1->region_offset) return -1; @@ -2361,22 +2345,22 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, return -ENOMEM; import_guid(&nd_set->type_guid, spa->range_guid); - info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL); + info = devm_kcalloc(dev, nr, sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; - info2 = devm_kzalloc(dev, sizeof_nfit_set_info2(nr), GFP_KERNEL); + info2 = devm_kcalloc(dev, nr, sizeof(*info2), GFP_KERNEL); if (!info2) return -ENOMEM; for (i = 0; i < nr; i++) { struct nd_mapping_desc *mapping = &ndr_desc->mapping[i]; - struct nfit_set_info_map *map = &info->mapping[i]; - struct nfit_set_info_map2 *map2 = &info2->mapping[i]; struct nvdimm *nvdimm = mapping->nvdimm; struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); - struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc, - spa->range_index, i); + struct nfit_set_info *map = &info[i]; + struct nfit_set_info2 *map2 = &info2[i]; + struct acpi_nfit_memory_map *memdev = + memdev_from_spa(acpi_desc, spa->range_index, i); struct acpi_nfit_control_region *dcr = nfit_mem->dcr; if (!memdev || !nfit_mem->dcr) { @@ -2395,23 +2379,20 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, } /* v1.1 namespaces */ - sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map), - cmp_map, NULL); - nd_set->cookie1 = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0); + sort(info, nr, sizeof(*info), cmp_map, NULL); + nd_set->cookie1 = nd_fletcher64(info, sizeof(*info) * nr, 0); /* v1.2 namespaces */ - sort(&info2->mapping[0], nr, sizeof(struct nfit_set_info_map2), - cmp_map2, NULL); - nd_set->cookie2 = nd_fletcher64(info2, sizeof_nfit_set_info2(nr), 0); + sort(info2, nr, sizeof(*info2), cmp_map2, NULL); + nd_set->cookie2 = nd_fletcher64(info2, sizeof(*info2) * nr, 0); /* support v1.1 namespaces created with the wrong sort order */ - sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map), - cmp_map_compat, NULL); - nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0); + sort(info, nr, sizeof(*info), cmp_map_compat, NULL); + nd_set->altcookie = nd_fletcher64(info, sizeof(*info) * nr, 0); /* record the result of the sort for the mapping position */ for (i = 0; i < nr; i++) { - struct nfit_set_info_map2 *map2 = &info2->mapping[i]; + struct nfit_set_info2 *map2 = &info2[i]; int j; for (j = 0; j < nr; j++) { diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 1db063b02f63..22566b4b3150 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2123,12 +2123,12 @@ void acpi_walk_dep_device_list(acpi_handle handle) list_for_each_entry_safe(dep, tmp, &acpi_dep_list, node) { if (dep->supplier == handle) { acpi_bus_get_device(dep->consumer, &adev); - if (!adev) - continue; - adev->dep_unmet--; - if (!adev->dep_unmet) - acpi_bus_attach(adev, true); + if (adev) { + adev->dep_unmet--; + if (!adev->dep_unmet) + acpi_bus_attach(adev, true); + } list_del(&dep->node); kfree(dep); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 9860d4842f36..c2aaf690352c 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -245,7 +245,7 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref, if (req_prod - rsp_prod > size) goto fail; - err = bind_interdomain_evtchn_to_irqhandler_lateeoi(blkif->domid, + err = bind_interdomain_evtchn_to_irqhandler_lateeoi(blkif->be->dev, evtchn, xen_blkif_be_int, 0, "blkif-backend", ring); if (err < 0) goto fail; diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 41ff2071d7ef..88ce5f0ffc4b 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -437,38 +437,31 @@ int btintel_read_version_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver tlv = (struct intel_tlv *)skb->data; switch (tlv->type) { case INTEL_TLV_CNVI_TOP: - version->cnvi_top = - __le32_to_cpu(get_unaligned_le32(tlv->val)); + version->cnvi_top = get_unaligned_le32(tlv->val); break; case INTEL_TLV_CNVR_TOP: - version->cnvr_top = - __le32_to_cpu(get_unaligned_le32(tlv->val)); + version->cnvr_top = get_unaligned_le32(tlv->val); break; case INTEL_TLV_CNVI_BT: - version->cnvi_bt = - __le32_to_cpu(get_unaligned_le32(tlv->val)); + version->cnvi_bt = get_unaligned_le32(tlv->val); break; case INTEL_TLV_CNVR_BT: - version->cnvr_bt = - __le32_to_cpu(get_unaligned_le32(tlv->val)); + version->cnvr_bt = get_unaligned_le32(tlv->val); break; case INTEL_TLV_DEV_REV_ID: - version->dev_rev_id = - __le16_to_cpu(get_unaligned_le16(tlv->val)); + version->dev_rev_id = get_unaligned_le16(tlv->val); break; case INTEL_TLV_IMAGE_TYPE: version->img_type = tlv->val[0]; break; case INTEL_TLV_TIME_STAMP: - version->timestamp = - __le16_to_cpu(get_unaligned_le16(tlv->val)); + version->timestamp = get_unaligned_le16(tlv->val); break; case INTEL_TLV_BUILD_TYPE: version->build_type = tlv->val[0]; break; case INTEL_TLV_BUILD_NUM: - version->build_num = - __le32_to_cpu(get_unaligned_le32(tlv->val)); + version->build_num = get_unaligned_le32(tlv->val); break; case INTEL_TLV_SECURE_BOOT: version->secure_boot = tlv->val[0]; diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index 5f9f02795631..9872ef18f9fe 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -442,15 +442,15 @@ static int btmtksdio_rx_packet(struct btmtksdio_dev *bdev, u16 rx_size) } switch ((&pkts[i])->lsize) { - case 1: - dlen = skb->data[(&pkts[i])->loff]; - break; - case 2: - dlen = get_unaligned_le16(skb->data + + case 1: + dlen = skb->data[(&pkts[i])->loff]; + break; + case 2: + dlen = get_unaligned_le16(skb->data + (&pkts[i])->loff); - break; - default: - goto err_kfree_skb; + break; + default: + goto err_kfree_skb; } pad_size = skb->len - (&pkts[i])->hlen - dlen; diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index f85a55add9be..25114f0d1319 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -94,6 +94,53 @@ out: } EXPORT_SYMBOL_GPL(qca_read_soc_version); +static int qca_read_fw_build_info(struct hci_dev *hdev) +{ + struct sk_buff *skb; + struct edl_event_hdr *edl; + char cmd, build_label[QCA_FW_BUILD_VER_LEN]; + int build_lbl_len, err = 0; + + bt_dev_dbg(hdev, "QCA read fw build info"); + + cmd = EDL_GET_BUILD_INFO_CMD; + skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, EDL_PATCH_CMD_LEN, + &cmd, 0, HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + bt_dev_err(hdev, "Reading QCA fw build info failed (%d)", + err); + return err; + } + + edl = (struct edl_event_hdr *)(skb->data); + if (!edl) { + bt_dev_err(hdev, "QCA read fw build info with no header"); + err = -EILSEQ; + goto out; + } + + if (edl->cresp != EDL_CMD_REQ_RES_EVT || + edl->rtype != EDL_GET_BUILD_INFO_CMD) { + bt_dev_err(hdev, "QCA Wrong packet received %d %d", edl->cresp, + edl->rtype); + err = -EIO; + goto out; + } + + build_lbl_len = edl->data[0]; + if (build_lbl_len <= QCA_FW_BUILD_VER_LEN - 1) { + memcpy(build_label, edl->data + 1, build_lbl_len); + *(build_label + build_lbl_len) = '\0'; + } + + hci_set_fw_info(hdev, "%s", build_label); + +out: + kfree_skb(skb); + return err; +} + static int qca_send_reset(struct hci_dev *hdev) { struct sk_buff *skb; @@ -517,6 +564,19 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, return err; } + /* WCN399x supports the Microsoft vendor extension with 0xFD70 as the + * VsMsftOpCode. + */ + switch (soc_type) { + case QCA_WCN3990: + case QCA_WCN3991: + case QCA_WCN3998: + hci_set_msft_opcode(hdev, 0xFD70); + break; + default: + break; + } + /* Perform HCI reset */ err = qca_send_reset(hdev); if (err < 0) { @@ -524,6 +584,13 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, return err; } + if (soc_type == QCA_WCN3991) { + /* get fw build info */ + err = qca_read_fw_build_info(hdev); + if (err < 0) + return err; + } + bt_dev_info(hdev, "QCA setup on UART is completed"); return 0; diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h index e73b8f8775bd..b19add7675a4 100644 --- a/drivers/bluetooth/btqca.h +++ b/drivers/bluetooth/btqca.h @@ -11,6 +11,7 @@ #define EDL_PATCH_CMD_LEN (1) #define EDL_PATCH_VER_REQ_CMD (0x19) #define EDL_PATCH_TLV_REQ_CMD (0x1E) +#define EDL_GET_BUILD_INFO_CMD (0x20) #define EDL_NVM_ACCESS_SET_REQ_CMD (0x01) #define MAX_SIZE_PER_TLV_SEGMENT (243) #define QCA_PRE_SHUTDOWN_CMD (0xFC08) diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c index 98d53764871f..2acb719e596f 100644 --- a/drivers/bluetooth/btqcomsmd.c +++ b/drivers/bluetooth/btqcomsmd.c @@ -142,12 +142,16 @@ static int btqcomsmd_probe(struct platform_device *pdev) btq->cmd_channel = qcom_wcnss_open_channel(wcnss, "APPS_RIVA_BT_CMD", btqcomsmd_cmd_callback, btq); - if (IS_ERR(btq->cmd_channel)) - return PTR_ERR(btq->cmd_channel); + if (IS_ERR(btq->cmd_channel)) { + ret = PTR_ERR(btq->cmd_channel); + goto destroy_acl_channel; + } hdev = hci_alloc_dev(); - if (!hdev) - return -ENOMEM; + if (!hdev) { + ret = -ENOMEM; + goto destroy_cmd_channel; + } hci_set_drvdata(hdev, btq); btq->hdev = hdev; @@ -161,14 +165,21 @@ static int btqcomsmd_probe(struct platform_device *pdev) hdev->set_bdaddr = qca_set_bdaddr_rome; ret = hci_register_dev(hdev); - if (ret < 0) { - hci_free_dev(hdev); - return ret; - } + if (ret < 0) + goto hci_free_dev; platform_set_drvdata(pdev, btq); return 0; + +hci_free_dev: + hci_free_dev(hdev); +destroy_cmd_channel: + rpmsg_destroy_ept(btq->cmd_channel); +destroy_acl_channel: + rpmsg_destroy_ept(btq->acl_channel); + + return ret; } static int btqcomsmd_remove(struct platform_device *pdev) diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index a4f7cace66b0..e7fe5fb22753 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -38,6 +38,19 @@ .hci_ver = (hciv), \ .hci_bus = (bus) +enum btrtl_chip_id { + CHIP_ID_8723A, + CHIP_ID_8723B, + CHIP_ID_8821A, + CHIP_ID_8761A, + CHIP_ID_8822B = 8, + CHIP_ID_8723D, + CHIP_ID_8821C, + CHIP_ID_8822C = 13, + CHIP_ID_8761B, + CHIP_ID_8852A = 18, +}; + struct id_table { __u16 match_flags; __u16 lmp_subver; @@ -58,6 +71,7 @@ struct btrtl_device_info { u8 *cfg_data; int cfg_len; bool drop_fw; + int project_id; }; static const struct id_table ic_id_table[] = { @@ -307,8 +321,10 @@ static int rtlbt_parse_firmware(struct hci_dev *hdev, /* Find project_id in table */ for (i = 0; i < ARRAY_SIZE(project_id_to_lmp_subver); i++) { - if (project_id == project_id_to_lmp_subver[i].id) + if (project_id == project_id_to_lmp_subver[i].id) { + btrtl_dev->project_id = project_id; break; + } } if (i >= ARRAY_SIZE(project_id_to_lmp_subver)) { @@ -658,6 +674,12 @@ out_free: } } + /* RTL8822CE supports the Microsoft vendor extension and uses 0xFCF0 + * for VsMsftOpCode. + */ + if (lmp_subver == RTL_ROM_LMP_8822B) + hci_set_msft_opcode(hdev, 0xFCF0); + return btrtl_dev; err_free: @@ -708,13 +730,28 @@ int btrtl_setup_realtek(struct hci_dev *hdev) ret = btrtl_download_firmware(hdev, btrtl_dev); - btrtl_free(btrtl_dev); - /* Enable controller to do both LE scan and BR/EDR inquiry * simultaneously. */ set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + /* Enable central-peripheral role (able to create new connections with + * an existing connection in slave role). + */ + /* Enable WBS supported for the specific Realtek devices. */ + switch (btrtl_dev->project_id) { + case CHIP_ID_8822C: + case CHIP_ID_8852A: + set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); + set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); + break; + default: + rtl_dev_dbg(hdev, "Central-peripheral role not enabled."); + rtl_dev_dbg(hdev, "WBS supported not enabled."); + break; + } + + btrtl_free(btrtl_dev); return ret; } EXPORT_SYMBOL_GPL(btrtl_setup_realtek); diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 03b83aa91277..52683fd22e05 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -368,6 +368,8 @@ static const struct usb_device_id blacklist_table[] = { BTUSB_WIDEBAND_SPEECH }, { USB_DEVICE(0x8087, 0x0032), .driver_info = BTUSB_INTEL_NEWGEN | BTUSB_WIDEBAND_SPEECH}, + { USB_DEVICE(0x8087, 0x0033), .driver_info = BTUSB_INTEL_NEWGEN | + BTUSB_WIDEBAND_SPEECH}, { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR }, { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL }, { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL }, @@ -506,7 +508,6 @@ static const struct dmi_system_id btusb_needs_reset_resume_table[] = { #define BTUSB_HW_RESET_ACTIVE 12 #define BTUSB_TX_WAIT_VND_EVT 13 #define BTUSB_WAKEUP_DISABLE 14 -#define BTUSB_USE_ALT1_FOR_WBS 15 struct btusb_data { struct hci_dev *hdev; @@ -1736,15 +1737,12 @@ static void btusb_work(struct work_struct *work) new_alts = data->sco_num; } } else if (data->air_mode == HCI_NOTIFY_ENABLE_SCO_TRANSP) { - /* Check if Alt 6 is supported for Transparent audio */ - if (btusb_find_altsetting(data, 6)) { - data->usb_alt6_packet_flow = true; - new_alts = 6; - } else if (test_bit(BTUSB_USE_ALT1_FOR_WBS, &data->flags)) { - new_alts = 1; - } else { - bt_dev_err(hdev, "Device does not support ALT setting 6"); - } + /* Bluetooth USB spec recommends alt 6 (63 bytes), but + * many adapters do not support it. Alt 1 appears to + * work for all adapters that do not have alt 6, and + * which work with WBS at all. + */ + new_alts = btusb_find_altsetting(data, 6) ? 6 : 1; } if (btusb_switch_alt_setting(hdev, new_alts) < 0) @@ -1903,7 +1901,7 @@ static int btusb_setup_csr(struct hci_dev *hdev) le16_to_cpu(rp->lmp_subver) == 0x1012 && le16_to_cpu(rp->hci_rev) == 0x0810 && le16_to_cpu(rp->hci_ver) == BLUETOOTH_VER_4_0) { - bt_dev_warn(hdev, "CSR: detected a fake CSR dongle using a Barrot 8041a02 chip, this chip is very buggy and may have issues\n"); + bt_dev_warn(hdev, "CSR: detected a fake CSR dongle using a Barrot 8041a02 chip, this chip is very buggy and may have issues"); pm_runtime_allow(&data->udev->dev); @@ -1911,7 +1909,7 @@ static int btusb_setup_csr(struct hci_dev *hdev) if (ret >= 0) msleep(200); else - bt_dev_err(hdev, "Failed to suspend the device for Barrot 8041a02 receive-issue workaround\n"); + bt_dev_err(hdev, "Failed to suspend the device for Barrot 8041a02 receive-issue workaround"); pm_runtime_forbid(&data->udev->dev); @@ -2924,7 +2922,10 @@ finish: * extension are using 0xFC1E for VsMsftOpCode. */ switch (ver.hw_variant) { + case 0x11: /* JfP */ case 0x12: /* ThP */ + case 0x13: /* HrP */ + case 0x14: /* CcP */ hci_set_msft_opcode(hdev, 0xFC1E); break; } @@ -3127,6 +3128,12 @@ static int btusb_shutdown_intel_new(struct hci_dev *hdev) #define FIRMWARE_MT7668 "mediatek/mt7668pr2h.bin" #define HCI_WMT_MAX_EVENT_SIZE 64 +/* It is for mt79xx download rom patch*/ +#define MTK_FW_ROM_PATCH_HEADER_SIZE 32 +#define MTK_FW_ROM_PATCH_GD_SIZE 64 +#define MTK_FW_ROM_PATCH_SEC_MAP_SIZE 64 +#define MTK_SEC_MAP_COMMON_SIZE 12 +#define MTK_SEC_MAP_NEED_SEND_SIZE 52 enum { BTMTK_WMT_PATCH_DWNLD = 0x1, @@ -3138,6 +3145,7 @@ enum { enum { BTMTK_WMT_INVALID, BTMTK_WMT_PATCH_UNDONE, + BTMTK_WMT_PATCH_PROGRESS, BTMTK_WMT_PATCH_DONE, BTMTK_WMT_ON_UNDONE, BTMTK_WMT_ON_DONE, @@ -3153,7 +3161,7 @@ struct btmtk_wmt_hdr { struct btmtk_hci_wmt_cmd { struct btmtk_wmt_hdr hdr; - u8 data[256]; + u8 data[]; } __packed; struct btmtk_hci_wmt_evt { @@ -3182,6 +3190,40 @@ struct btmtk_hci_wmt_params { u32 *status; }; +struct btmtk_patch_header { + u8 datetime[16]; + u8 platform[4]; + __le16 hwver; + __le16 swver; + __le32 magicnum; +} __packed; + +struct btmtk_global_desc { + __le32 patch_ver; + __le32 sub_sys; + __le32 feature_opt; + __le32 section_num; +} __packed; + +struct btmtk_section_map { + __le32 sectype; + __le32 secoffset; + __le32 secsize; + union { + __le32 u4SecSpec[13]; + struct { + __le32 dlAddr; + __le32 dlsize; + __le32 seckeyidx; + __le32 alignlen; + __le32 sectype; + __le32 dlmodecrctype; + __le32 crc; + __le32 reserved[6]; + } bin_info_spec; + }; +} __packed; + static void btusb_mtk_wmt_recv(struct urb *urb) { struct hci_dev *hdev = urb->context; @@ -3199,7 +3241,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) skb = bt_skb_alloc(HCI_WMT_MAX_EVENT_SIZE, GFP_ATOMIC); if (!skb) { hdev->stat.err_rx++; - goto err_out; + return; } hci_skb_pkt_type(skb) = HCI_EVENT_PKT; @@ -3217,13 +3259,18 @@ static void btusb_mtk_wmt_recv(struct urb *urb) */ if (test_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags)) { data->evt_skb = skb_clone(skb, GFP_ATOMIC); - if (!data->evt_skb) - goto err_out; + if (!data->evt_skb) { + kfree_skb(skb); + return; + } } err = hci_recv_frame(hdev, skb); - if (err < 0) - goto err_free_skb; + if (err < 0) { + kfree_skb(data->evt_skb); + data->evt_skb = NULL; + return; + } if (test_and_clear_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags)) { @@ -3232,11 +3279,6 @@ static void btusb_mtk_wmt_recv(struct urb *urb) wake_up_bit(&data->flags, BTUSB_TX_WAIT_VND_EVT); } -err_out: - return; -err_free_skb: - kfree_skb(data->evt_skb); - data->evt_skb = NULL; return; } else if (urb->status == -ENOENT) { /* Avoid suspend failed when usb_kill_urb */ @@ -3252,7 +3294,7 @@ err_free_skb: * to generate the event. Otherwise, the WMT event cannot return from * the device successfully. */ - udelay(100); + udelay(500); usb_anchor_urb(urb, &data->ctrl_anchor); err = usb_submit_urb(urb, GFP_ATOMIC); @@ -3327,7 +3369,7 @@ static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev, struct btmtk_hci_wmt_evt_funcc *wmt_evt_funcc; u32 hlen, status = BTMTK_WMT_INVALID; struct btmtk_hci_wmt_evt *wmt_evt; - struct btmtk_hci_wmt_cmd wc; + struct btmtk_hci_wmt_cmd *wc; struct btmtk_wmt_hdr *hdr; int err; @@ -3341,20 +3383,24 @@ static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev, if (hlen > 255) return -EINVAL; - hdr = (struct btmtk_wmt_hdr *)&wc; + wc = kzalloc(hlen, GFP_KERNEL); + if (!wc) + return -ENOMEM; + + hdr = &wc->hdr; hdr->dir = 1; hdr->op = wmt_params->op; hdr->dlen = cpu_to_le16(wmt_params->dlen + 1); hdr->flag = wmt_params->flag; - memcpy(wc.data, wmt_params->data, wmt_params->dlen); + memcpy(wc->data, wmt_params->data, wmt_params->dlen); set_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags); - err = __hci_cmd_send(hdev, 0xfc6f, hlen, &wc); + err = __hci_cmd_send(hdev, 0xfc6f, hlen, wc); if (err < 0) { clear_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags); - return err; + goto err_free_wc; } /* The vendor specific WMT commands are all answered by a vendor @@ -3371,13 +3417,14 @@ static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev, if (err == -EINTR) { bt_dev_err(hdev, "Execution of wmt command interrupted"); clear_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags); - return err; + goto err_free_wc; } if (err) { bt_dev_err(hdev, "Execution of wmt command timed out"); clear_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags); - return -ETIMEDOUT; + err = -ETIMEDOUT; + goto err_free_wc; } /* Parse and handle the return WMT event */ @@ -3405,6 +3452,14 @@ static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev, else status = BTMTK_WMT_ON_UNDONE; break; + case BTMTK_WMT_PATCH_DWNLD: + if (wmt_evt->whdr.flag == 2) + status = BTMTK_WMT_PATCH_DONE; + else if (wmt_evt->whdr.flag == 1) + status = BTMTK_WMT_PATCH_PROGRESS; + else + status = BTMTK_WMT_PATCH_UNDONE; + break; } if (wmt_params->status) @@ -3413,6 +3468,119 @@ static int btusb_mtk_hci_wmt_sync(struct hci_dev *hdev, err_free_skb: kfree_skb(data->evt_skb); data->evt_skb = NULL; +err_free_wc: + kfree(wc); + return err; +} + +static int btusb_mtk_setup_firmware_79xx(struct hci_dev *hdev, const char *fwname) +{ + struct btmtk_hci_wmt_params wmt_params; + struct btmtk_global_desc *globaldesc = NULL; + struct btmtk_section_map *sectionmap; + const struct firmware *fw; + const u8 *fw_ptr; + const u8 *fw_bin_ptr; + int err, dlen, i, status; + u8 flag, first_block, retry; + u32 section_num, dl_size, section_offset; + u8 cmd[64]; + + err = request_firmware(&fw, fwname, &hdev->dev); + if (err < 0) { + bt_dev_err(hdev, "Failed to load firmware file (%d)", err); + return err; + } + + fw_ptr = fw->data; + fw_bin_ptr = fw_ptr; + globaldesc = (struct btmtk_global_desc *)(fw_ptr + MTK_FW_ROM_PATCH_HEADER_SIZE); + section_num = globaldesc->section_num; + + for (i = 0; i < section_num; i++) { + first_block = 1; + fw_ptr = fw_bin_ptr; + sectionmap = (struct btmtk_section_map *)(fw_ptr + MTK_FW_ROM_PATCH_HEADER_SIZE + + MTK_FW_ROM_PATCH_GD_SIZE + MTK_FW_ROM_PATCH_SEC_MAP_SIZE * i); + + section_offset = sectionmap->secoffset; + dl_size = sectionmap->bin_info_spec.dlsize; + + if (dl_size > 0) { + retry = 20; + while (retry > 0) { + cmd[0] = 0; /* 0 means legacy dl mode. */ + memcpy(cmd + 1, + fw_ptr + MTK_FW_ROM_PATCH_HEADER_SIZE + + MTK_FW_ROM_PATCH_GD_SIZE + MTK_FW_ROM_PATCH_SEC_MAP_SIZE * i + + MTK_SEC_MAP_COMMON_SIZE, + MTK_SEC_MAP_NEED_SEND_SIZE + 1); + + wmt_params.op = BTMTK_WMT_PATCH_DWNLD; + wmt_params.status = &status; + wmt_params.flag = 0; + wmt_params.dlen = MTK_SEC_MAP_NEED_SEND_SIZE + 1; + wmt_params.data = &cmd; + + err = btusb_mtk_hci_wmt_sync(hdev, &wmt_params); + if (err < 0) { + bt_dev_err(hdev, "Failed to send wmt patch dwnld (%d)", + err); + goto err_release_fw; + } + + if (status == BTMTK_WMT_PATCH_UNDONE) { + break; + } else if (status == BTMTK_WMT_PATCH_PROGRESS) { + msleep(100); + retry--; + } else if (status == BTMTK_WMT_PATCH_DONE) { + goto next_section; + } else { + bt_dev_err(hdev, "Failed wmt patch dwnld status (%d)", + status); + goto err_release_fw; + } + } + + fw_ptr += section_offset; + wmt_params.op = BTMTK_WMT_PATCH_DWNLD; + wmt_params.status = NULL; + + while (dl_size > 0) { + dlen = min_t(int, 250, dl_size); + if (first_block == 1) { + flag = 1; + first_block = 0; + } else if (dl_size - dlen <= 0) { + flag = 3; + } else { + flag = 2; + } + + wmt_params.flag = flag; + wmt_params.dlen = dlen; + wmt_params.data = fw_ptr; + + err = btusb_mtk_hci_wmt_sync(hdev, &wmt_params); + if (err < 0) { + bt_dev_err(hdev, "Failed to send wmt patch dwnld (%d)", + err); + goto err_release_fw; + } + + dl_size -= dlen; + fw_ptr += dlen; + } + } +next_section: + continue; + } + /* Wait a few moments for firmware activation done */ + usleep_range(100000, 120000); + +err_release_fw: + release_firmware(fw); return err; } @@ -3465,7 +3633,7 @@ static int btusb_mtk_setup_firmware(struct hci_dev *hdev, const char *fwname) while (fw_size > 0) { dlen = min_t(int, 250, fw_size); - /* Tell deivice the position in sequence */ + /* Tell device the position in sequence */ if (fw_size - dlen <= 0) flag = 3; else if (fw_size < fw->size - 30) @@ -3555,9 +3723,9 @@ err_free_buf: return err; } -static int btusb_mtk_id_get(struct btusb_data *data, u32 *id) +static int btusb_mtk_id_get(struct btusb_data *data, u32 reg, u32 *id) { - return btusb_mtk_reg_read(data, 0x80000008, id); + return btusb_mtk_reg_read(data, reg, id); } static int btusb_mtk_setup(struct hci_dev *hdev) @@ -3571,16 +3739,31 @@ static int btusb_mtk_setup(struct hci_dev *hdev) const char *fwname; int err, status; u32 dev_id; + char fw_bin_name[64]; + u32 fw_version; u8 param; calltime = ktime_get(); - err = btusb_mtk_id_get(data, &dev_id); + err = btusb_mtk_id_get(data, 0x80000008, &dev_id); if (err < 0) { bt_dev_err(hdev, "Failed to get device id (%d)", err); return err; } + if (!dev_id) { + err = btusb_mtk_id_get(data, 0x70010200, &dev_id); + if (err < 0) { + bt_dev_err(hdev, "Failed to get device id (%d)", err); + return err; + } + err = btusb_mtk_id_get(data, 0x80021004, &fw_version); + if (err < 0) { + bt_dev_err(hdev, "Failed to get fw version (%d)", err); + return err; + } + } + switch (dev_id) { case 0x7663: fwname = FIRMWARE_MT7663; @@ -3588,8 +3771,28 @@ static int btusb_mtk_setup(struct hci_dev *hdev) case 0x7668: fwname = FIRMWARE_MT7668; break; + case 0x7961: + snprintf(fw_bin_name, sizeof(fw_bin_name), + "mediatek/BT_RAM_CODE_MT%04x_1_%x_hdr.bin", + dev_id & 0xffff, (fw_version & 0xff) + 1); + err = btusb_mtk_setup_firmware_79xx(hdev, fw_bin_name); + + /* Enable Bluetooth protocol */ + param = 1; + wmt_params.op = BTMTK_WMT_FUNC_CTRL; + wmt_params.flag = 0; + wmt_params.dlen = sizeof(param); + wmt_params.data = ¶m; + wmt_params.status = NULL; + + err = btusb_mtk_hci_wmt_sync(hdev, &wmt_params); + if (err < 0) { + bt_dev_err(hdev, "Failed to send wmt func ctrl (%d)", err); + return err; + } + goto done; default: - bt_dev_err(hdev, "Unsupported support hardware variant (%08x)", + bt_dev_err(hdev, "Unsupported hardware variant (%08x)", dev_id); return -ENODEV; } @@ -3665,6 +3868,7 @@ ignore_func_on: } kfree_skb(skb); +done: rettime = ktime_get(); delta = ktime_sub(rettime, calltime); duration = (unsigned long long)ktime_to_ns(delta) >> 10; @@ -3725,7 +3929,7 @@ static int marvell_config_oob_wake(struct hci_dev *hdev) skb = bt_skb_alloc(sizeof(cmd), GFP_KERNEL); if (!skb) { - bt_dev_err(hdev, "%s: No memory\n", __func__); + bt_dev_err(hdev, "%s: No memory", __func__); return -ENOMEM; } @@ -3734,7 +3938,7 @@ static int marvell_config_oob_wake(struct hci_dev *hdev) ret = btusb_send_frame(hdev, skb); if (ret) { - bt_dev_err(hdev, "%s: configuration failed\n", __func__); + bt_dev_err(hdev, "%s: configuration failed", __func__); kfree_skb(skb); return ret; } @@ -4069,6 +4273,13 @@ static int btusb_setup_qca(struct hci_dev *hdev) info = &qca_devices_table[i]; } if (!info) { + /* If the rom_version is not matched in the qca_devices_table + * and the high ROM version is not zero, we assume this chip no + * need to load the rampatch and nvm. + */ + if (ver_rom & ~0xffffU) + return 0; + bt_dev_err(hdev, "don't support firmware rome 0x%x", ver_rom); return -ENODEV; } @@ -4264,6 +4475,20 @@ static bool btusb_prevent_wake(struct hci_dev *hdev) return !device_may_wakeup(&data->udev->dev); } +static int btusb_shutdown_qca(struct hci_dev *hdev) +{ + struct sk_buff *skb; + + skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT); + if (IS_ERR(skb)) { + bt_dev_err(hdev, "HCI reset during shutdown failed"); + return PTR_ERR(skb); + } + kfree_skb(skb); + + return 0; +} + static int btusb_probe(struct usb_interface *intf, const struct usb_device_id *id) { @@ -4523,6 +4748,7 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_QCA_WCN6855) { data->setup_on_usb = btusb_setup_qca; + hdev->shutdown = btusb_shutdown_qca; hdev->set_bdaddr = btusb_set_bdaddr_wcn6855; hdev->cmd_timeout = btusb_qca_cmd_timeout; set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); @@ -4548,10 +4774,6 @@ static int btusb_probe(struct usb_interface *intf, * (DEVICE_REMOTE_WAKEUP) */ set_bit(BTUSB_WAKEUP_DISABLE, &data->flags); - if (btusb_find_altsetting(data, 1)) - set_bit(BTUSB_USE_ALT1_FOR_WBS, &data->flags); - else - bt_dev_err(hdev, "Device does not support ALT setting 1"); } if (!reset) @@ -4627,8 +4849,8 @@ static int btusb_probe(struct usb_interface *intf, data->diag = NULL; } - if (enable_autosuspend) - usb_enable_autosuspend(data->udev); + if (!enable_autosuspend) + usb_disable_autosuspend(data->udev); err = hci_register_dev(hdev); if (err < 0) @@ -4688,6 +4910,9 @@ static void btusb_disconnect(struct usb_interface *intf) gpiod_put(data->reset_gpio); hci_free_dev(hdev); + + if (!enable_autosuspend) + usb_enable_autosuspend(data->udev); } #ifdef CONFIG_PM diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 8ea5ca8d71d6..3764ceb6fa0d 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -654,6 +654,7 @@ static const struct h4_recv_pkt bcm_recv_pkts[] = { { H4_RECV_ACL, .recv = hci_recv_frame }, { H4_RECV_SCO, .recv = hci_recv_frame }, { H4_RECV_EVENT, .recv = hci_recv_frame }, + { H4_RECV_ISO, .recv = hci_recv_frame }, { BCM_RECV_LM_DIAG, .recv = hci_recv_diag }, { BCM_RECV_NULL, .recv = hci_recv_diag }, { BCM_RECV_TYPE49, .recv = hci_recv_diag }, diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c index 7be16a7f653b..27e96681d583 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c @@ -906,6 +906,11 @@ static int h5_btrtl_setup(struct h5 *h5) /* Give the device some time before the hci-core sends it a reset */ usleep_range(10000, 20000); + /* Enable controller to do both LE scan and BR/EDR inquiry + * simultaneously. + */ + set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &h5->hu->hdev->quirks); + out_free: btrtl_free(btrtl_dev); @@ -1022,6 +1027,8 @@ static const struct of_device_id rtl_bluetooth_of_match[] = { .data = (const void *)&rtl_vnd }, { .compatible = "realtek,rtl8723bs-bt", .data = (const void *)&rtl_vnd }, + { .compatible = "realtek,rtl8723ds-bt", + .data = (const void *)&rtl_vnd }, #endif { }, }; diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index f83d67eafc9f..8be4d807d137 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -127,10 +127,9 @@ int hci_uart_tx_wakeup(struct hci_uart *hu) if (!test_bit(HCI_UART_PROTO_READY, &hu->flags)) goto no_schedule; - if (test_and_set_bit(HCI_UART_SENDING, &hu->tx_state)) { - set_bit(HCI_UART_TX_WAKEUP, &hu->tx_state); + set_bit(HCI_UART_TX_WAKEUP, &hu->tx_state); + if (test_and_set_bit(HCI_UART_SENDING, &hu->tx_state)) goto no_schedule; - } BT_DBG(""); @@ -174,10 +173,10 @@ restart: kfree_skb(skb); } + clear_bit(HCI_UART_SENDING, &hu->tx_state); if (test_bit(HCI_UART_TX_WAKEUP, &hu->tx_state)) goto restart; - clear_bit(HCI_UART_SENDING, &hu->tx_state); wake_up_bit(&hu->tx_state, HCI_UART_SENDING); } diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 4a963682c702..de36af63e182 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -50,7 +50,8 @@ #define IBS_HOST_TX_IDLE_TIMEOUT_MS 2000 #define CMD_TRANS_TIMEOUT_MS 100 #define MEMDUMP_TIMEOUT_MS 8000 -#define IBS_DISABLE_SSR_TIMEOUT_MS (MEMDUMP_TIMEOUT_MS + 1000) +#define IBS_DISABLE_SSR_TIMEOUT_MS \ + (MEMDUMP_TIMEOUT_MS + FW_DOWNLOAD_TIMEOUT_MS) #define FW_DOWNLOAD_TIMEOUT_MS 3000 /* susclk rate */ @@ -76,7 +77,8 @@ enum qca_flags { QCA_MEMDUMP_COLLECTION, QCA_HW_ERROR_EVENT, QCA_SSR_TRIGGERED, - QCA_BT_OFF + QCA_BT_OFF, + QCA_ROM_FW }; enum qca_capabilities { @@ -1024,7 +1026,9 @@ static void qca_controller_memdump(struct work_struct *work) dump_size = __le32_to_cpu(dump->dump_size); if (!(dump_size)) { bt_dev_err(hu->hdev, "Rx invalid memdump size"); + kfree(qca_memdump); kfree_skb(skb); + qca->qca_memdump = NULL; mutex_unlock(&qca->hci_memdump_lock); return; } @@ -1661,6 +1665,7 @@ static int qca_setup(struct hci_uart *hu) if (ret) return ret; + clear_bit(QCA_ROM_FW, &qca->flags); /* Patch downloading has to be done without IBS mode */ set_bit(QCA_IBS_DISABLED, &qca->flags); @@ -1718,12 +1723,14 @@ retry: hu->hdev->cmd_timeout = qca_cmd_timeout; } else if (ret == -ENOENT) { /* No patch/nvm-config found, run with original fw/config */ + set_bit(QCA_ROM_FW, &qca->flags); ret = 0; } else if (ret == -EAGAIN) { /* * Userspace firmware loader will return -EAGAIN in case no * patch/nvm-config is found, so run with original fw/config. */ + set_bit(QCA_ROM_FW, &qca->flags); ret = 0; } @@ -2100,17 +2107,29 @@ static int __maybe_unused qca_suspend(struct device *dev) set_bit(QCA_SUSPENDING, &qca->flags); - if (test_bit(QCA_BT_OFF, &qca->flags)) + /* if BT SoC is running with default firmware then it does not + * support in-band sleep + */ + if (test_bit(QCA_ROM_FW, &qca->flags)) + return 0; + + /* During SSR after memory dump collection, controller will be + * powered off and then powered on.If controller is powered off + * during SSR then we should wait until SSR is completed. + */ + if (test_bit(QCA_BT_OFF, &qca->flags) && + !test_bit(QCA_SSR_TRIGGERED, &qca->flags)) return 0; - if (test_bit(QCA_IBS_DISABLED, &qca->flags)) { + if (test_bit(QCA_IBS_DISABLED, &qca->flags) || + test_bit(QCA_SSR_TRIGGERED, &qca->flags)) { wait_timeout = test_bit(QCA_SSR_TRIGGERED, &qca->flags) ? IBS_DISABLE_SSR_TIMEOUT_MS : FW_DOWNLOAD_TIMEOUT_MS; /* QCA_IBS_DISABLED flag is set to true, During FW download * and during memory dump collection. It is reset to false, - * After FW download complete and after memory dump collections. + * After FW download complete. */ wait_on_bit_timeout(&qca->flags, QCA_IBS_DISABLED, TASK_UNINTERRUPTIBLE, msecs_to_jiffies(wait_timeout)); @@ -2122,10 +2141,6 @@ static int __maybe_unused qca_suspend(struct device *dev) } } - /* After memory dump collection, Controller is powered off.*/ - if (test_bit(QCA_BT_OFF, &qca->flags)) - return 0; - cancel_work_sync(&qca->ws_awake_device); cancel_work_sync(&qca->ws_awake_rx); diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index ef96ad06fa54..9e03402ef1b3 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -83,9 +83,9 @@ static void hci_uart_write_work(struct work_struct *work) hci_uart_tx_complete(hu, hci_skb_pkt_type(skb)); kfree_skb(skb); } - } while (test_bit(HCI_UART_TX_WAKEUP, &hu->tx_state)); - clear_bit(HCI_UART_SENDING, &hu->tx_state); + clear_bit(HCI_UART_SENDING, &hu->tx_state); + } while (test_bit(HCI_UART_TX_WAKEUP, &hu->tx_state)); } /* ------- Interface to HCI layer ------ */ diff --git a/drivers/bus/simple-pm-bus.c b/drivers/bus/simple-pm-bus.c index c5eb46cbf388..01a3d0cd08ed 100644 --- a/drivers/bus/simple-pm-bus.c +++ b/drivers/bus/simple-pm-bus.c @@ -16,6 +16,7 @@ static int simple_pm_bus_probe(struct platform_device *pdev) { + const struct of_dev_auxdata *lookup = dev_get_platdata(&pdev->dev); struct device_node *np = pdev->dev.of_node; dev_dbg(&pdev->dev, "%s\n", __func__); @@ -23,7 +24,7 @@ static int simple_pm_bus_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); if (np) - of_platform_populate(np, NULL, NULL, &pdev->dev); + of_platform_populate(np, NULL, lookup, &pdev->dev); return 0; } diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 1e4fbb002a31..d3e5a6fceb61 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -26,6 +26,7 @@ #include <linux/uaccess.h> #include <acpi/processor.h> +#include <acpi/cppc_acpi.h> #include <asm/msr.h> #include <asm/processor.h> @@ -53,6 +54,7 @@ struct acpi_cpufreq_data { unsigned int resume; unsigned int cpu_feature; unsigned int acpi_perf_cpu; + unsigned int first_perf_state; cpumask_var_t freqdomain_cpus; void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val); u32 (*cpu_freq_read)(struct acpi_pct_register *reg); @@ -221,10 +223,10 @@ static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr) perf = to_perf_data(data); - cpufreq_for_each_entry(pos, policy->freq_table) + cpufreq_for_each_entry(pos, policy->freq_table + data->first_perf_state) if (msr == perf->states[pos->driver_data].status) return pos->frequency; - return policy->freq_table[0].frequency; + return policy->freq_table[data->first_perf_state].frequency; } static unsigned extract_freq(struct cpufreq_policy *policy, u32 val) @@ -363,6 +365,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) struct cpufreq_policy *policy; unsigned int freq; unsigned int cached_freq; + unsigned int state; pr_debug("%s (%d)\n", __func__, cpu); @@ -374,7 +377,11 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) if (unlikely(!data || !policy->freq_table)) return 0; - cached_freq = policy->freq_table[to_perf_data(data)->state].frequency; + state = to_perf_data(data)->state; + if (state < data->first_perf_state) + state = data->first_perf_state; + + cached_freq = policy->freq_table[state].frequency; freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data)); if (freq != cached_freq) { /* @@ -628,16 +635,54 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) } #endif +#ifdef CONFIG_ACPI_CPPC_LIB +static u64 get_max_boost_ratio(unsigned int cpu) +{ + struct cppc_perf_caps perf_caps; + u64 highest_perf, nominal_perf; + int ret; + + if (acpi_pstate_strict) + return 0; + + ret = cppc_get_perf_caps(cpu, &perf_caps); + if (ret) { + pr_debug("CPU%d: Unable to get performance capabilities (%d)\n", + cpu, ret); + return 0; + } + + highest_perf = perf_caps.highest_perf; + nominal_perf = perf_caps.nominal_perf; + + if (!highest_perf || !nominal_perf) { + pr_debug("CPU%d: highest or nominal performance missing\n", cpu); + return 0; + } + + if (highest_perf < nominal_perf) { + pr_debug("CPU%d: nominal performance above highest\n", cpu); + return 0; + } + + return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); +} +#else +static inline u64 get_max_boost_ratio(unsigned int cpu) { return 0; } +#endif + static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) { - unsigned int i; - unsigned int valid_states = 0; - unsigned int cpu = policy->cpu; + struct cpufreq_frequency_table *freq_table; + struct acpi_processor_performance *perf; struct acpi_cpufreq_data *data; + unsigned int cpu = policy->cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); + unsigned int valid_states = 0; unsigned int result = 0; - struct cpuinfo_x86 *c = &cpu_data(policy->cpu); - struct acpi_processor_performance *perf; - struct cpufreq_frequency_table *freq_table; + unsigned int state_count; + u64 max_boost_ratio; + unsigned int i; #ifdef CONFIG_SMP static int blacklisted; #endif @@ -750,8 +795,28 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) goto err_unreg; } - freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table), - GFP_KERNEL); + state_count = perf->state_count + 1; + + max_boost_ratio = get_max_boost_ratio(cpu); + if (max_boost_ratio) { + /* + * Make a room for one more entry to represent the highest + * available "boost" frequency. + */ + state_count++; + valid_states++; + data->first_perf_state = valid_states; + } else { + /* + * If the maximum "boost" frequency is unknown, ask the arch + * scale-invariance code to use the "nominal" performance for + * CPU utilization scaling so as to prevent the schedutil + * governor from selecting inadequate CPU frequencies. + */ + arch_set_max_freq_ratio(true); + } + + freq_table = kcalloc(state_count, sizeof(*freq_table), GFP_KERNEL); if (!freq_table) { result = -ENOMEM; goto err_unreg; @@ -785,6 +850,30 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) valid_states++; } freq_table[valid_states].frequency = CPUFREQ_TABLE_END; + + if (max_boost_ratio) { + unsigned int state = data->first_perf_state; + unsigned int freq = freq_table[state].frequency; + + /* + * Because the loop above sorts the freq_table entries in the + * descending order, freq is the maximum frequency in the table. + * Assume that it corresponds to the CPPC nominal frequency and + * use it to populate the frequency field of the extra "boost" + * frequency entry. + */ + freq_table[0].frequency = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT; + /* + * The purpose of the extra "boost" frequency entry is to make + * the rest of cpufreq aware of the real maximum frequency, but + * the way to request it is the same as for the first_perf_state + * entry that is expected to cover the entire range of "boost" + * frequencies of the CPU, so copy the driver_data value from + * that entry. + */ + freq_table[0].driver_data = freq_table[state].driver_data; + } + policy->freq_table = freq_table; perf->state = 0; @@ -858,8 +947,10 @@ static void acpi_cpufreq_cpu_ready(struct cpufreq_policy *policy) { struct acpi_processor_performance *perf = per_cpu_ptr(acpi_perf_data, policy->cpu); + struct acpi_cpufreq_data *data = policy->driver_data; + unsigned int freq = policy->freq_table[data->first_perf_state].frequency; - if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq) + if (perf->states[0].core_frequency * 1000 != freq) pr_warn(FW_WARN "P-state 0 is not max freq\n"); } diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 962cbb5e5f7f..fe6a460c4373 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1110,7 +1110,6 @@ static void __dma_async_device_channel_unregister(struct dma_device *device, "%s called while %d clients hold a reference\n", __func__, chan->client_count); mutex_lock(&dma_list_mutex); - list_del(&chan->device_node); device->chancnt--; chan->dev->chan = NULL; mutex_unlock(&dma_list_mutex); diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 19a23767533a..7ab83fe601ed 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -982,11 +982,8 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) dev_vdbg(chan2dev(chan), "%s\n", __func__); - pm_runtime_get_sync(dw->dma.dev); - /* ASSERT: channel is idle */ if (dma_readl(dw, CH_EN) & dwc->mask) { - pm_runtime_put_sync_suspend(dw->dma.dev); dev_dbg(chan2dev(chan), "DMA channel not idle?\n"); return -EIO; } @@ -1003,7 +1000,6 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) * We need controller-specific data to set up slave transfers. */ if (chan->private && !dw_dma_filter(chan, chan->private)) { - pm_runtime_put_sync_suspend(dw->dma.dev); dev_warn(chan2dev(chan), "Wrong controller-specific data\n"); return -EINVAL; } @@ -1047,8 +1043,6 @@ static void dwc_free_chan_resources(struct dma_chan *chan) if (!dw->in_use) do_dw_dma_off(dw); - pm_runtime_put_sync_suspend(dw->dma.dev); - dev_vdbg(chan2dev(chan), "%s: done\n", __func__); } diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 95f94a3ed6be..84a6ea60ecf0 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -398,17 +398,31 @@ static inline bool idxd_is_enabled(struct idxd_device *idxd) return false; } +static inline bool idxd_device_is_halted(struct idxd_device *idxd) +{ + union gensts_reg gensts; + + gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); + + return (gensts.state == IDXD_DEVICE_STATE_HALT); +} + /* * This is function is only used for reset during probe and will * poll for completion. Once the device is setup with interrupts, * all commands will be done via interrupt completion. */ -void idxd_device_init_reset(struct idxd_device *idxd) +int idxd_device_init_reset(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; union idxd_command_reg cmd; unsigned long flags; + if (idxd_device_is_halted(idxd)) { + dev_warn(&idxd->pdev->dev, "Device is HALTED!\n"); + return -ENXIO; + } + memset(&cmd, 0, sizeof(cmd)); cmd.cmd = IDXD_CMD_RESET_DEVICE; dev_dbg(dev, "%s: sending reset for init.\n", __func__); @@ -419,6 +433,7 @@ void idxd_device_init_reset(struct idxd_device *idxd) IDXD_CMDSTS_ACTIVE) cpu_relax(); spin_unlock_irqrestore(&idxd->dev_lock, flags); + return 0; } static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, @@ -428,6 +443,12 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand, DECLARE_COMPLETION_ONSTACK(done); unsigned long flags; + if (idxd_device_is_halted(idxd)) { + dev_warn(&idxd->pdev->dev, "Device is HALTED!\n"); + *status = IDXD_CMDSTS_HW_ERR; + return; + } + memset(&cmd, 0, sizeof(cmd)); cmd.cmd = cmd_code; cmd.operand = operand; diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 8ed2773d8285..71fd6e4c42cd 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -205,5 +205,8 @@ int idxd_register_dma_channel(struct idxd_wq *wq) void idxd_unregister_dma_channel(struct idxd_wq *wq) { - dma_async_device_channel_unregister(&wq->idxd->dma_dev, &wq->dma_chan); + struct dma_chan *chan = &wq->dma_chan; + + dma_async_device_channel_unregister(&wq->idxd->dma_dev, chan); + list_del(&chan->device_node); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 5a50e91c71bf..81a0e65fd316 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -326,7 +326,7 @@ void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id); void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id); /* device control */ -void idxd_device_init_reset(struct idxd_device *idxd); +int idxd_device_init_reset(struct idxd_device *idxd); int idxd_device_enable(struct idxd_device *idxd); int idxd_device_disable(struct idxd_device *idxd); void idxd_device_reset(struct idxd_device *idxd); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 2c051e07c34c..fa04acd5582a 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -335,7 +335,10 @@ static int idxd_probe(struct idxd_device *idxd) int rc; dev_dbg(dev, "%s entered and resetting device\n", __func__); - idxd_device_init_reset(idxd); + rc = idxd_device_init_reset(idxd); + if (rc < 0) + return rc; + dev_dbg(dev, "IDXD reset complete\n"); if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM)) { diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 593a2f6ed16c..a60ca11a5784 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -111,19 +111,14 @@ irqreturn_t idxd_irq_handler(int vec, void *data) return IRQ_WAKE_THREAD; } -irqreturn_t idxd_misc_thread(int vec, void *data) +static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) { - struct idxd_irq_entry *irq_entry = data; - struct idxd_device *idxd = irq_entry->idxd; struct device *dev = &idxd->pdev->dev; union gensts_reg gensts; - u32 cause, val = 0; + u32 val = 0; int i; bool err = false; - cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); - iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); - if (cause & IDXD_INTC_ERR) { spin_lock_bh(&idxd->dev_lock); for (i = 0; i < 4; i++) @@ -181,7 +176,7 @@ irqreturn_t idxd_misc_thread(int vec, void *data) val); if (!err) - goto out; + return 0; /* * This case should rarely happen and typically is due to software @@ -211,37 +206,58 @@ irqreturn_t idxd_misc_thread(int vec, void *data) gensts.reset_type == IDXD_DEVICE_RESET_FLR ? "FLR" : "system reset"); spin_unlock_bh(&idxd->dev_lock); + return -ENXIO; } } - out: + return 0; +} + +irqreturn_t idxd_misc_thread(int vec, void *data) +{ + struct idxd_irq_entry *irq_entry = data; + struct idxd_device *idxd = irq_entry->idxd; + int rc; + u32 cause; + + cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); + if (cause) + iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); + + while (cause) { + rc = process_misc_interrupts(idxd, cause); + if (rc < 0) + break; + cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); + if (cause) + iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); + } + idxd_unmask_msix_vector(idxd, irq_entry->id); return IRQ_HANDLED; } -static bool process_fault(struct idxd_desc *desc, u64 fault_addr) +static inline bool match_fault(struct idxd_desc *desc, u64 fault_addr) { /* * Completion address can be bad as well. Check fault address match for descriptor * and completion address. */ - if ((u64)desc->hw == fault_addr || - (u64)desc->completion == fault_addr) { - idxd_dma_complete_txd(desc, IDXD_COMPLETE_DEV_FAIL); + if ((u64)desc->hw == fault_addr || (u64)desc->completion == fault_addr) { + struct idxd_device *idxd = desc->wq->idxd; + struct device *dev = &idxd->pdev->dev; + + dev_warn(dev, "desc with fault address: %#llx\n", fault_addr); return true; } return false; } -static bool complete_desc(struct idxd_desc *desc) +static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason) { - if (desc->completion->status) { - idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL); - return true; - } - - return false; + idxd_dma_complete_txd(desc, reason); + idxd_free_desc(desc->wq, desc); } static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, @@ -251,25 +267,25 @@ static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry, struct idxd_desc *desc, *t; struct llist_node *head; int queued = 0; - bool completed = false; unsigned long flags; + enum idxd_complete_type reason; *processed = 0; head = llist_del_all(&irq_entry->pending_llist); if (!head) goto out; - llist_for_each_entry_safe(desc, t, head, llnode) { - if (wtype == IRQ_WORK_NORMAL) - completed = complete_desc(desc); - else if (wtype == IRQ_WORK_PROCESS_FAULT) - completed = process_fault(desc, data); + if (wtype == IRQ_WORK_NORMAL) + reason = IDXD_COMPLETE_NORMAL; + else + reason = IDXD_COMPLETE_DEV_FAIL; - if (completed) { - idxd_free_desc(desc->wq, desc); + llist_for_each_entry_safe(desc, t, head, llnode) { + if (desc->completion->status) { + if ((desc->completion->status & DSA_COMP_STATUS_MASK) != DSA_COMP_SUCCESS) + match_fault(desc, data); + complete_desc(desc, reason); (*processed)++; - if (wtype == IRQ_WORK_PROCESS_FAULT) - break; } else { spin_lock_irqsave(&irq_entry->list_lock, flags); list_add_tail(&desc->list, @@ -287,42 +303,46 @@ static int irq_process_work_list(struct idxd_irq_entry *irq_entry, enum irq_work_type wtype, int *processed, u64 data) { - struct list_head *node, *next; int queued = 0; - bool completed = false; unsigned long flags; + LIST_HEAD(flist); + struct idxd_desc *desc, *n; + enum idxd_complete_type reason; *processed = 0; - spin_lock_irqsave(&irq_entry->list_lock, flags); - if (list_empty(&irq_entry->work_list)) - goto out; - - list_for_each_safe(node, next, &irq_entry->work_list) { - struct idxd_desc *desc = - container_of(node, struct idxd_desc, list); + if (wtype == IRQ_WORK_NORMAL) + reason = IDXD_COMPLETE_NORMAL; + else + reason = IDXD_COMPLETE_DEV_FAIL; + /* + * This lock protects list corruption from access of list outside of the irq handler + * thread. + */ + spin_lock_irqsave(&irq_entry->list_lock, flags); + if (list_empty(&irq_entry->work_list)) { spin_unlock_irqrestore(&irq_entry->list_lock, flags); - if (wtype == IRQ_WORK_NORMAL) - completed = complete_desc(desc); - else if (wtype == IRQ_WORK_PROCESS_FAULT) - completed = process_fault(desc, data); + return 0; + } - if (completed) { - spin_lock_irqsave(&irq_entry->list_lock, flags); + list_for_each_entry_safe(desc, n, &irq_entry->work_list, list) { + if (desc->completion->status) { list_del(&desc->list); - spin_unlock_irqrestore(&irq_entry->list_lock, flags); - idxd_free_desc(desc->wq, desc); (*processed)++; - if (wtype == IRQ_WORK_PROCESS_FAULT) - return queued; + list_add_tail(&desc->list, &flist); } else { queued++; } - spin_lock_irqsave(&irq_entry->list_lock, flags); } - out: spin_unlock_irqrestore(&irq_entry->list_lock, flags); + + list_for_each_entry(desc, &flist, list) { + if ((desc->completion->status & DSA_COMP_STATUS_MASK) != DSA_COMP_SUCCESS) + match_fault(desc, data); + complete_desc(desc, reason); + } + return queued; } diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 298460438bb4..f474a1232335 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -2401,7 +2401,8 @@ static int bcdma_alloc_chan_resources(struct dma_chan *chan) dev_err(ud->ddev.dev, "Descriptor pool allocation failed\n"); uc->use_dma_pool = false; - return -ENOMEM; + ret = -ENOMEM; + goto err_res_free; } uc->use_dma_pool = true; diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index 1a7b51163528..1631727bf0da 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -776,6 +776,8 @@ static void edge_detector_stop(struct line *line) cancel_delayed_work_sync(&line->work); WRITE_ONCE(line->sw_debounced, 0); WRITE_ONCE(line->eflags, 0); + if (line->desc) + WRITE_ONCE(line->desc->debounce_period_us, 0); /* do not change line->level - see comment in debounced_value() */ } diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index b78a634cca24..97eec8d8dbdc 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -603,7 +603,11 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, ret = gdev->id; goto err_free_gdev; } - dev_set_name(&gdev->dev, GPIOCHIP_NAME "%d", gdev->id); + + ret = dev_set_name(&gdev->dev, GPIOCHIP_NAME "%d", gdev->id); + if (ret) + goto err_free_ida; + device_initialize(&gdev->dev); dev_set_drvdata(&gdev->dev, gdev); if (gc->parent && gc->parent->driver) @@ -617,7 +621,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, gdev->descs = kcalloc(gc->ngpio, sizeof(gdev->descs[0]), GFP_KERNEL); if (!gdev->descs) { ret = -ENOMEM; - goto err_free_ida; + goto err_free_dev_name; } if (gc->ngpio == 0) { @@ -768,6 +772,8 @@ err_free_label: kfree_const(gdev->label); err_free_descs: kfree(gdev->descs); +err_free_dev_name: + kfree(dev_name(&gdev->dev)); err_free_ida: ida_free(&gpio_ida, gdev->id); err_free_gdev: @@ -2551,7 +2557,7 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, struct gpio_chip *gc = desc_array[i]->gdev->chip; unsigned long fastpath[2 * BITS_TO_LONGS(FASTPATH_NGPIO)]; unsigned long *mask, *bits; - int first, j, ret; + int first, j; if (likely(gc->ngpio <= FASTPATH_NGPIO)) { mask = fastpath; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 2d991da2cead..d1ed4f8df2b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -26,6 +26,7 @@ #include <linux/sched/task.h> #include "amdgpu_object.h" +#include "amdgpu_gem.h" #include "amdgpu_vm.h" #include "amdgpu_amdkfd.h" #include "amdgpu_dma_buf.h" @@ -1152,7 +1153,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct sg_table *sg = NULL; uint64_t user_addr = 0; struct amdgpu_bo *bo; - struct amdgpu_bo_param bp; + struct drm_gem_object *gobj; u32 domain, alloc_domain; u64 alloc_flags; int ret; @@ -1220,19 +1221,14 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", va, size, domain_string(alloc_domain)); - memset(&bp, 0, sizeof(bp)); - bp.size = size; - bp.byte_align = 1; - bp.domain = alloc_domain; - bp.flags = alloc_flags; - bp.type = bo_type; - bp.resv = NULL; - ret = amdgpu_bo_create(adev, &bp, &bo); + ret = amdgpu_gem_object_create(adev, size, 1, alloc_domain, alloc_flags, + bo_type, NULL, &gobj); if (ret) { pr_debug("Failed to create BO on domain %s. ret %d\n", - domain_string(alloc_domain), ret); + domain_string(alloc_domain), ret); goto err_bo_create; } + bo = gem_to_amdgpu_bo(gobj); if (bo_type == ttm_bo_type_sg) { bo->tbo.sg = sg; bo->tbo.ttm->sg = sg; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index f764803c53a4..48cb33e5b382 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -926,8 +926,10 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd) { - struct drm_gem_object *obj; struct amdgpu_framebuffer *amdgpu_fb; + struct drm_gem_object *obj; + struct amdgpu_bo *bo; + uint32_t domains; int ret; obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[0]); @@ -938,7 +940,9 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev, } /* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */ - if (obj->import_attach) { + bo = gem_to_amdgpu_bo(obj); + domains = amdgpu_display_supported_domains(drm_to_adev(dev), bo->flags); + if (obj->import_attach && !(domains & AMDGPU_GEM_DOMAIN_GTT)) { drm_dbg_kms(dev, "Cannot create framebuffer from imported dma_buf\n"); return ERR_PTR(-EINVAL); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index d0a1fee1f5f6..174a73eb23f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -269,8 +269,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, resv = vm->root.base.bo->tbo.base.resv; } -retry: initial_domain = (u32)(0xffffffff & args->in.domains); +retry: r = amdgpu_gem_object_create(adev, size, args->in.alignment, initial_domain, flags, ttm_bo_type_device, resv, &gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 25ec4d57333f..b4c8e5d5c763 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -897,7 +897,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return -EINVAL; /* A shared bo cannot be migrated to VRAM */ - if (bo->prime_shared_count) { + if (bo->prime_shared_count || bo->tbo.base.import_attach) { if (domain & AMDGPU_GEM_DOMAIN_GTT) domain = AMDGPU_GEM_DOMAIN_GTT; else diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 346963e3cf73..d86b42a36560 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -99,6 +99,10 @@ #define mmGCR_GENERAL_CNTL_Sienna_Cichlid 0x1580 #define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX 0 +#define mmCGTS_TCC_DISABLE_Vangogh 0x5006 +#define mmCGTS_TCC_DISABLE_Vangogh_BASE_IDX 1 +#define mmCGTS_USER_TCC_DISABLE_Vangogh 0x5007 +#define mmCGTS_USER_TCC_DISABLE_Vangogh_BASE_IDX 1 #define mmGOLDEN_TSC_COUNT_UPPER_Vangogh 0x0025 #define mmGOLDEN_TSC_COUNT_UPPER_Vangogh_BASE_IDX 1 #define mmGOLDEN_TSC_COUNT_LOWER_Vangogh 0x0026 @@ -4936,8 +4940,18 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) { /* TCCs are global (not instanced). */ - uint32_t tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE) | - RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE); + uint32_t tcc_disable; + + switch (adev->asic_type) { + case CHIP_VANGOGH: + tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE_Vangogh) | + RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE_Vangogh); + break; + default: + tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE) | + RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE); + break; + } adev->gfx.config.tcc_disabled_mask = REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c6da89df055d..961abf1cf040 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1833,8 +1833,8 @@ static void emulated_link_detect(struct dc_link *link) link->type = dc_connection_none; prev_sink = link->local_sink; - if (prev_sink != NULL) - dc_sink_retain(prev_sink); + if (prev_sink) + dc_sink_release(prev_sink); switch (link->connector_signal) { case SIGNAL_TYPE_HDMI_TYPE_A: { @@ -1934,7 +1934,7 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state, dc_commit_updates_for_stream( dm->dc, bundle->surface_updates, dc_state->stream_status->plane_count, - dc_state->streams[k], &bundle->stream_update, dc_state); + dc_state->streams[k], &bundle->stream_update); } cleanup: @@ -1965,8 +1965,7 @@ static void dm_set_dpms_off(struct dc_link *link) stream_update.stream = stream_state; dc_commit_updates_for_stream(stream_state->ctx->dc, NULL, 0, - stream_state, &stream_update, - stream_state->ctx->dc->current_state); + stream_state, &stream_update); mutex_unlock(&adev->dm.dc_lock); } @@ -2330,8 +2329,10 @@ void amdgpu_dm_update_connector_after_detect( * TODO: check if we still need the S3 mode update workaround. * If yes, put it here. */ - if (aconnector->dc_sink) + if (aconnector->dc_sink) { amdgpu_dm_update_freesync_caps(connector, NULL); + dc_sink_release(aconnector->dc_sink); + } aconnector->dc_sink = sink; dc_sink_retain(aconnector->dc_sink); @@ -2347,8 +2348,6 @@ void amdgpu_dm_update_connector_after_detect( drm_connector_update_edid_property(connector, aconnector->edid); - drm_add_edid_modes(connector, aconnector->edid); - if (aconnector->dc_link->aux_mode) drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux, aconnector->edid); @@ -7549,7 +7548,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct drm_crtc *pcrtc, bool wait_for_vblank) { - uint32_t i; + int i; uint64_t timestamp_ns; struct drm_plane *plane; struct drm_plane_state *old_plane_state, *new_plane_state; @@ -7590,7 +7589,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, amdgpu_dm_commit_cursors(state); /* update planes when needed */ - for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { + for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) { struct drm_crtc *crtc = new_plane_state->crtc; struct drm_crtc_state *new_crtc_state; struct drm_framebuffer *fb = new_plane_state->fb; @@ -7813,8 +7812,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->surface_updates, planes_count, acrtc_state->stream, - &bundle->stream_update, - dc_state); + &bundle->stream_update); /** * Enable or disable the interrupts on the backend. @@ -8150,13 +8148,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state); struct dm_connector_state *dm_old_con_state = to_dm_connector_state(old_con_state); struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); - struct dc_surface_update dummy_updates[MAX_SURFACES]; + struct dc_surface_update surface_updates[MAX_SURFACES]; struct dc_stream_update stream_update; struct dc_info_packet hdr_packet; struct dc_stream_status *status = NULL; bool abm_changed, hdr_changed, scaling_changed; - memset(&dummy_updates, 0, sizeof(dummy_updates)); + memset(&surface_updates, 0, sizeof(surface_updates)); memset(&stream_update, 0, sizeof(stream_update)); if (acrtc) { @@ -8213,16 +8211,15 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) * To fix this, DC should permit updating only stream properties. */ for (j = 0; j < status->plane_count; j++) - dummy_updates[j].surface = status->plane_states[0]; + surface_updates[j].surface = status->plane_states[j]; mutex_lock(&dm->dc_lock); dc_commit_updates_for_stream(dm->dc, - dummy_updates, + surface_updates, status->plane_count, dm_new_crtc_state->stream, - &stream_update, - dc_state); + &stream_update); mutex_unlock(&dm->dc_lock); } @@ -8359,14 +8356,14 @@ static int dm_force_atomic_commit(struct drm_connector *connector) ret = PTR_ERR_OR_ZERO(conn_state); if (ret) - goto err; + goto out; /* Attach crtc to drm_atomic_state*/ crtc_state = drm_atomic_get_crtc_state(state, &disconnected_acrtc->base); ret = PTR_ERR_OR_ZERO(crtc_state); if (ret) - goto err; + goto out; /* force a restore */ crtc_state->mode_changed = true; @@ -8376,17 +8373,15 @@ static int dm_force_atomic_commit(struct drm_connector *connector) ret = PTR_ERR_OR_ZERO(plane_state); if (ret) - goto err; - + goto out; /* Call commit internally with the state we just constructed */ ret = drm_atomic_commit(state); - if (!ret) - return 0; -err: - DRM_ERROR("Restoring old state failed with %i\n", ret); +out: drm_atomic_state_put(state); + if (ret) + DRM_ERROR("Restoring old state failed with %i\n", ret); return ret; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 8ab0b9060d2b..f2d8cf34be46 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -833,6 +833,9 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, if (computed_streams[i]) continue; + if (dcn20_remove_stream_from_ctx(stream->ctx->dc, dc_state, stream) != DC_OK) + return false; + mutex_lock(&aconnector->mst_mgr.lock); if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link)) { mutex_unlock(&aconnector->mst_mgr.lock); @@ -850,7 +853,8 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, stream = dc_state->streams[i]; if (stream->timing.flags.DSC == 1) - dc_stream_add_dsc_to_resource(stream->ctx->dc, dc_state, stream); + if (dc_stream_add_dsc_to_resource(stream->ctx->dc, dc_state, stream) != DC_OK) + return false; } return true; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 58eb0d69873a..6cf1a5a2a5ec 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2679,8 +2679,7 @@ void dc_commit_updates_for_stream(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, struct dc_stream_state *stream, - struct dc_stream_update *stream_update, - struct dc_state *state) + struct dc_stream_update *stream_update) { const struct dc_stream_status *stream_status; enum surface_update_type update_type; @@ -2699,6 +2698,12 @@ void dc_commit_updates_for_stream(struct dc *dc, if (update_type >= UPDATE_TYPE_FULL) { + struct dc_plane_state *new_planes[MAX_SURFACES]; + + memset(new_planes, 0, sizeof(new_planes)); + + for (i = 0; i < surface_count; i++) + new_planes[i] = srf_updates[i].surface; /* initialize scratch memory for building context */ context = dc_create_state(dc); @@ -2707,15 +2712,21 @@ void dc_commit_updates_for_stream(struct dc *dc, return; } - dc_resource_state_copy_construct(state, context); + dc_resource_state_copy_construct( + dc->current_state, context); - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + /*remove old surfaces from context */ + if (!dc_rem_all_planes_for_stream(dc, stream, context)) { + DC_ERROR("Failed to remove streams for new validate context!\n"); + return; + } - if (new_pipe->plane_state && new_pipe->plane_state != old_pipe->plane_state) - new_pipe->plane_state->force_full_update = true; + /* add surface to context */ + if (!dc_add_all_planes_for_stream(dc, stream, new_planes, surface_count, context)) { + DC_ERROR("Failed to add streams for new validate context!\n"); + return; } + } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index f95bade59624..1e4794e2825c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -892,13 +892,13 @@ static uint32_t translate_training_aux_read_interval(uint32_t dpcd_aux_read_inte switch (dpcd_aux_read_interval) { case 0x01: - aux_rd_interval_us = 400; + aux_rd_interval_us = 4000; break; case 0x02: - aux_rd_interval_us = 4000; + aux_rd_interval_us = 8000; break; case 0x03: - aux_rd_interval_us = 8000; + aux_rd_interval_us = 12000; break; case 0x04: aux_rd_interval_us = 16000; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index b7910976b81a..e243c01b9672 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -283,8 +283,7 @@ void dc_commit_updates_for_stream(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, struct dc_stream_state *stream, - struct dc_stream_update *stream_update, - struct dc_state *state); + struct dc_stream_update *stream_update); /* * Log the current stream state. */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index b000b43a820d..674376428916 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -906,6 +906,8 @@ enum dcn20_clk_src_array_id { DCN20_CLK_SRC_PLL0, DCN20_CLK_SRC_PLL1, DCN20_CLK_SRC_PLL2, + DCN20_CLK_SRC_PLL3, + DCN20_CLK_SRC_PLL4, DCN20_CLK_SRC_TOTAL_DCN21 }; @@ -2030,6 +2032,14 @@ static bool dcn21_resource_construct( dcn21_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL2, &clk_src_regs[2], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL3] = + dcn21_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs[3], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL4] = + dcn21_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL4, + &clk_src_regs[4], false); pool->base.clk_src_count = DCN20_CLK_SRC_TOTAL_DCN21; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 5c1482d4ca43..92ad2cdbae10 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -591,14 +591,17 @@ static ssize_t vangogh_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_socket_power = metrics.CurrentSocketPower; gpu_metrics->average_cpu_power = metrics.Power[0]; gpu_metrics->average_soc_power = metrics.Power[1]; + gpu_metrics->average_gfx_power = metrics.Power[2]; memcpy(&gpu_metrics->average_core_power[0], &metrics.CorePower[0], sizeof(uint16_t) * 8); gpu_metrics->average_gfxclk_frequency = metrics.GfxclkFrequency; gpu_metrics->average_socclk_frequency = metrics.SocclkFrequency; + gpu_metrics->average_uclk_frequency = metrics.MemclkFrequency; gpu_metrics->average_fclk_frequency = metrics.MemclkFrequency; gpu_metrics->average_vclk_frequency = metrics.VclkFrequency; + gpu_metrics->average_dclk_frequency = metrics.DclkFrequency; memcpy(&gpu_metrics->current_coreclk[0], &metrics.CoreFrequency[0], diff --git a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c index 0c98d27f84ac..fee27952ec6d 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c @@ -14,6 +14,7 @@ #include <linux/regmap.h> #include <linux/regulator/consumer.h> #include <linux/wait.h> +#include <linux/workqueue.h> #include <sound/hdmi-codec.h> @@ -36,6 +37,7 @@ struct lt9611uxc { struct mutex ocm_lock; struct wait_queue_head wq; + struct work_struct work; struct device_node *dsi0_node; struct device_node *dsi1_node; @@ -52,6 +54,8 @@ struct lt9611uxc { bool hpd_supported; bool edid_read; + /* can be accessed from different threads, so protect this with ocm_lock */ + bool hdmi_connected; uint8_t fw_version; }; @@ -143,21 +147,41 @@ static irqreturn_t lt9611uxc_irq_thread_handler(int irq, void *dev_id) if (irq_status) regmap_write(lt9611uxc->regmap, 0xb022, 0); - lt9611uxc_unlock(lt9611uxc); - - if (irq_status & BIT(0)) + if (irq_status & BIT(0)) { lt9611uxc->edid_read = !!(hpd_status & BIT(0)); + wake_up_all(<9611uxc->wq); + } if (irq_status & BIT(1)) { - if (lt9611uxc->connector.dev) - drm_kms_helper_hotplug_event(lt9611uxc->connector.dev); - else - drm_bridge_hpd_notify(<9611uxc->bridge, !!(hpd_status & BIT(1))); + lt9611uxc->hdmi_connected = hpd_status & BIT(1); + schedule_work(<9611uxc->work); } + lt9611uxc_unlock(lt9611uxc); + return IRQ_HANDLED; } +static void lt9611uxc_hpd_work(struct work_struct *work) +{ + struct lt9611uxc *lt9611uxc = container_of(work, struct lt9611uxc, work); + bool connected; + + if (lt9611uxc->connector.dev) + drm_kms_helper_hotplug_event(lt9611uxc->connector.dev); + else { + + mutex_lock(<9611uxc->ocm_lock); + connected = lt9611uxc->hdmi_connected; + mutex_unlock(<9611uxc->ocm_lock); + + drm_bridge_hpd_notify(<9611uxc->bridge, + connected ? + connector_status_connected : + connector_status_disconnected); + } +} + static void lt9611uxc_reset(struct lt9611uxc *lt9611uxc) { gpiod_set_value_cansleep(lt9611uxc->reset_gpio, 1); @@ -445,18 +469,21 @@ static enum drm_connector_status lt9611uxc_bridge_detect(struct drm_bridge *brid struct lt9611uxc *lt9611uxc = bridge_to_lt9611uxc(bridge); unsigned int reg_val = 0; int ret; - int connected = 1; + bool connected = true; + + lt9611uxc_lock(lt9611uxc); if (lt9611uxc->hpd_supported) { - lt9611uxc_lock(lt9611uxc); ret = regmap_read(lt9611uxc->regmap, 0xb023, ®_val); - lt9611uxc_unlock(lt9611uxc); if (ret) dev_err(lt9611uxc->dev, "failed to read hpd status: %d\n", ret); else connected = reg_val & BIT(1); } + lt9611uxc->hdmi_connected = connected; + + lt9611uxc_unlock(lt9611uxc); return connected ? connector_status_connected : connector_status_disconnected; @@ -465,7 +492,7 @@ static enum drm_connector_status lt9611uxc_bridge_detect(struct drm_bridge *brid static int lt9611uxc_wait_for_edid(struct lt9611uxc *lt9611uxc) { return wait_event_interruptible_timeout(lt9611uxc->wq, lt9611uxc->edid_read, - msecs_to_jiffies(100)); + msecs_to_jiffies(500)); } static int lt9611uxc_get_edid_block(void *data, u8 *buf, unsigned int block, size_t len) @@ -503,7 +530,10 @@ static struct edid *lt9611uxc_bridge_get_edid(struct drm_bridge *bridge, ret = lt9611uxc_wait_for_edid(lt9611uxc); if (ret < 0) { dev_err(lt9611uxc->dev, "wait for EDID failed: %d\n", ret); - return ERR_PTR(ret); + return NULL; + } else if (ret == 0) { + dev_err(lt9611uxc->dev, "wait for EDID timeout\n"); + return NULL; } return drm_do_get_edid(connector, lt9611uxc_get_edid_block, lt9611uxc); @@ -926,6 +956,8 @@ retry: lt9611uxc->fw_version = ret; init_waitqueue_head(<9611uxc->wq); + INIT_WORK(<9611uxc->work, lt9611uxc_hpd_work); + ret = devm_request_threaded_irq(dev, client->irq, NULL, lt9611uxc_irq_thread_handler, IRQF_ONESHOT, "lt9611uxc", lt9611uxc); @@ -962,6 +994,7 @@ static int lt9611uxc_remove(struct i2c_client *client) struct lt9611uxc *lt9611uxc = i2c_get_clientdata(client); disable_irq(client->irq); + flush_scheduled_work(); lt9611uxc_audio_exit(lt9611uxc); drm_bridge_remove(<9611uxc->bridge); diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 0401b2f47500..8781deefeae3 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -3629,14 +3629,26 @@ static int drm_dp_send_up_ack_reply(struct drm_dp_mst_topology_mgr *mgr, return 0; } -static int drm_dp_get_vc_payload_bw(u8 dp_link_bw, u8 dp_link_count) +/** + * drm_dp_get_vc_payload_bw - get the VC payload BW for an MST link + * @link_rate: link rate in 10kbits/s units + * @link_lane_count: lane count + * + * Calculate the total bandwidth of a MultiStream Transport link. The returned + * value is in units of PBNs/(timeslots/1 MTP). This value can be used to + * convert the number of PBNs required for a given stream to the number of + * timeslots this stream requires in each MTP. + */ +int drm_dp_get_vc_payload_bw(int link_rate, int link_lane_count) { - if (dp_link_bw == 0 || dp_link_count == 0) - DRM_DEBUG_KMS("invalid link bandwidth in DPCD: %x (link count: %d)\n", - dp_link_bw, dp_link_count); + if (link_rate == 0 || link_lane_count == 0) + DRM_DEBUG_KMS("invalid link rate/lane count: (%d / %d)\n", + link_rate, link_lane_count); - return dp_link_bw * dp_link_count / 2; + /* See DP v2.0 2.6.4.2, VCPayload_Bandwidth_for_OneTimeSlotPer_MTP_Allocation */ + return link_rate * link_lane_count / 54000; } +EXPORT_SYMBOL(drm_dp_get_vc_payload_bw); /** * drm_dp_read_mst_cap() - check whether or not a sink supports MST @@ -3692,7 +3704,7 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms goto out_unlock; } - mgr->pbn_div = drm_dp_get_vc_payload_bw(mgr->dpcd[1], + mgr->pbn_div = drm_dp_get_vc_payload_bw(drm_dp_bw_code_to_link_rate(mgr->dpcd[1]), mgr->dpcd[2] & DP_MAX_LANE_COUNT_MASK); if (mgr->pbn_div == 0) { ret = -EINVAL; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index bf17365857ca..dc13d1814d95 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2754,6 +2754,9 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int n_entries, ln; u32 val; + if (enc_to_dig_port(encoder)->tc_mode == TC_PORT_TBT_ALT) + return; + ddi_translations = icl_get_mg_buf_trans(encoder, crtc_state, &n_entries); if (level >= n_entries) { drm_dbg_kms(&dev_priv->drm, @@ -2890,6 +2893,9 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, u32 val, dpcnt_mask, dpcnt_val; int n_entries, ln; + if (enc_to_dig_port(encoder)->tc_mode == TC_PORT_TBT_ALT) + return; + ddi_translations = tgl_get_dkl_buf_trans(encoder, crtc_state, &n_entries); if (level >= n_entries) @@ -3531,6 +3537,23 @@ static void intel_ddi_disable_fec_state(struct intel_encoder *encoder, intel_de_posting_read(dev_priv, dp_tp_ctl_reg(encoder, crtc_state)); } +static void intel_ddi_power_up_lanes(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *i915 = to_i915(encoder->base.dev); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); + enum phy phy = intel_port_to_phy(i915, encoder->port); + + if (intel_phy_is_combo(i915, phy)) { + bool lane_reversal = + dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL; + + intel_combo_phy_power_up_lanes(i915, phy, false, + crtc_state->lane_count, + lane_reversal); + } +} + static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state, struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, @@ -3620,14 +3643,7 @@ static void tgl_ddi_pre_enable_dp(struct intel_atomic_state *state, * 7.f Combo PHY: Configure PORT_CL_DW10 Static Power Down to power up * the used lanes of the DDI. */ - if (intel_phy_is_combo(dev_priv, phy)) { - bool lane_reversal = - dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL; - - intel_combo_phy_power_up_lanes(dev_priv, phy, false, - crtc_state->lane_count, - lane_reversal); - } + intel_ddi_power_up_lanes(encoder, crtc_state); /* * 7.g Configure and enable DDI_BUF_CTL @@ -3712,14 +3728,7 @@ static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state, else intel_prepare_dp_ddi_buffers(encoder, crtc_state); - if (intel_phy_is_combo(dev_priv, phy)) { - bool lane_reversal = - dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL; - - intel_combo_phy_power_up_lanes(dev_priv, phy, false, - crtc_state->lane_count, - lane_reversal); - } + intel_ddi_power_up_lanes(encoder, crtc_state); intel_ddi_init_dp_buf_reg(encoder, crtc_state); if (!is_mst) @@ -4205,6 +4214,8 @@ static void intel_enable_ddi_hdmi(struct intel_atomic_state *state, intel_de_write(dev_priv, reg, val); } + intel_ddi_power_up_lanes(encoder, crtc_state); + /* In HDMI/DVI mode, the port width, and swing/emphasis values * are ignored so nothing special needs to be done besides * enabling the port. diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 53a00cf3fa32..61be6bed9162 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -2309,7 +2309,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, */ ret = i915_vma_pin_fence(vma); if (ret != 0 && INTEL_GEN(dev_priv) < 4) { - i915_gem_object_unpin_from_display_plane(vma); + i915_vma_unpin(vma); vma = ERR_PTR(ret); goto err; } @@ -2327,12 +2327,9 @@ err: void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags) { - i915_gem_object_lock(vma->obj, NULL); if (flags & PLANE_HAS_FENCE) i915_vma_unpin_fence(vma); - i915_gem_object_unpin_from_display_plane(vma); - i915_gem_object_unlock(vma->obj); - + i915_vma_unpin(vma); i915_vma_put(vma); } @@ -4807,6 +4804,8 @@ u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, plane_color_ctl |= PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE; } else if (fb->format->is_yuv) { plane_color_ctl |= PLANE_COLOR_INPUT_CSC_ENABLE; + if (plane_state->hw.color_range == DRM_COLOR_YCBCR_FULL_RANGE) + plane_color_ctl |= PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE; } return plane_color_ctl; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 09123e8625c4..8a26307c4896 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4637,24 +4637,6 @@ ivb_cpu_edp_set_signal_levels(struct intel_dp *intel_dp, intel_de_posting_read(dev_priv, intel_dp->output_reg); } -void intel_dp_set_signal_levels(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) -{ - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - u8 train_set = intel_dp->train_set[0]; - - drm_dbg_kms(&dev_priv->drm, "Using vswing level %d%s\n", - train_set & DP_TRAIN_VOLTAGE_SWING_MASK, - train_set & DP_TRAIN_MAX_SWING_REACHED ? " (max)" : ""); - drm_dbg_kms(&dev_priv->drm, "Using pre-emphasis level %d%s\n", - (train_set & DP_TRAIN_PRE_EMPHASIS_MASK) >> - DP_TRAIN_PRE_EMPHASIS_SHIFT, - train_set & DP_TRAIN_MAX_PRE_EMPHASIS_REACHED ? - " (max)" : ""); - - intel_dp->set_signal_levels(intel_dp, crtc_state); -} - void intel_dp_program_link_training_pattern(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, @@ -5703,7 +5685,7 @@ static void intel_dp_process_phy_request(struct intel_dp *intel_dp, intel_dp_autotest_phy_ddi_disable(intel_dp, crtc_state); - intel_dp_set_signal_levels(intel_dp, crtc_state); + intel_dp_set_signal_levels(intel_dp, crtc_state, DP_PHY_DPRX); intel_dp_phy_pattern_update(intel_dp, crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 05f7ddf7a795..6620f9efdcbb 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -96,9 +96,6 @@ void intel_dp_program_link_training_pattern(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, u8 dp_train_pat); -void -intel_dp_set_signal_levels(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state); void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, u8 *link_bw, u8 *rate_select); bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 91d3979902d0..d8c6d7054d11 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -334,6 +334,27 @@ intel_dp_set_link_train(struct intel_dp *intel_dp, return drm_dp_dpcd_write(&intel_dp->aux, reg, buf, len) == len; } +void intel_dp_set_signal_levels(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + enum drm_dp_phy dp_phy) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + u8 train_set = intel_dp->train_set[0]; + char phy_name[10]; + + drm_dbg_kms(&dev_priv->drm, "Using vswing level %d%s, pre-emphasis level %d%s, at %s\n", + train_set & DP_TRAIN_VOLTAGE_SWING_MASK, + train_set & DP_TRAIN_MAX_SWING_REACHED ? " (max)" : "", + (train_set & DP_TRAIN_PRE_EMPHASIS_MASK) >> + DP_TRAIN_PRE_EMPHASIS_SHIFT, + train_set & DP_TRAIN_MAX_PRE_EMPHASIS_REACHED ? + " (max)" : "", + intel_dp_phy_name(dp_phy, phy_name, sizeof(phy_name))); + + if (intel_dp_phy_is_downstream_of_source(intel_dp, dp_phy)) + intel_dp->set_signal_levels(intel_dp, crtc_state); +} + static bool intel_dp_reset_link_train(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, @@ -341,7 +362,7 @@ intel_dp_reset_link_train(struct intel_dp *intel_dp, u8 dp_train_pat) { memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set)); - intel_dp_set_signal_levels(intel_dp, crtc_state); + intel_dp_set_signal_levels(intel_dp, crtc_state, dp_phy); return intel_dp_set_link_train(intel_dp, crtc_state, dp_phy, dp_train_pat); } @@ -355,7 +376,7 @@ intel_dp_update_link_train(struct intel_dp *intel_dp, DP_TRAINING_LANE0_SET_PHY_REPEATER(dp_phy); int ret; - intel_dp_set_signal_levels(intel_dp, crtc_state); + intel_dp_set_signal_levels(intel_dp, crtc_state, dp_phy); ret = drm_dp_dpcd_write(&intel_dp->aux, reg, intel_dp->train_set, crtc_state->lane_count); diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.h b/drivers/gpu/drm/i915/display/intel_dp_link_training.h index 86905aa24db7..6a1f76bd8c75 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.h +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.h @@ -17,6 +17,9 @@ void intel_dp_get_adjust_train(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, enum drm_dp_phy dp_phy, const u8 link_status[DP_LINK_STATUS_SIZE]); +void intel_dp_set_signal_levels(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + enum drm_dp_phy dp_phy); void intel_dp_start_link_train(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); void intel_dp_stop_link_train(struct intel_dp *intel_dp, diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 27f04aed8764..3286b232be0b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -69,7 +69,9 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, slots = drm_dp_atomic_find_vcpi_slots(state, &intel_dp->mst_mgr, connector->port, - crtc_state->pbn, 0); + crtc_state->pbn, + drm_dp_get_vc_payload_bw(crtc_state->port_clock, + crtc_state->lane_count)); if (slots == -EDEADLK) return slots; if (slots >= 0) diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 52b4f6193b4c..0095c8cac9b4 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -359,7 +359,7 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay) intel_frontbuffer_flip_complete(overlay->i915, INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); - i915_gem_object_unpin_from_display_plane(vma); + i915_vma_unpin(vma); i915_vma_put(vma); } @@ -860,7 +860,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, return 0; out_unpin: - i915_gem_object_unpin_from_display_plane(vma); + i915_vma_unpin(vma); out_pin_section: atomic_dec(&dev_priv->gpu_error.pending_fb_pin); diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index 019a2d6d807a..3da2544fa1c0 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -618,13 +618,19 @@ skl_program_scaler(struct intel_plane *plane, /* Preoffset values for YUV to RGB Conversion */ #define PREOFF_YUV_TO_RGB_HI 0x1800 -#define PREOFF_YUV_TO_RGB_ME 0x1F00 +#define PREOFF_YUV_TO_RGB_ME 0x0000 #define PREOFF_YUV_TO_RGB_LO 0x1800 #define ROFF(x) (((x) & 0xffff) << 16) #define GOFF(x) (((x) & 0xffff) << 0) #define BOFF(x) (((x) & 0xffff) << 16) +/* + * Programs the input color space conversion stage for ICL HDR planes. + * Note that it is assumed that this stage always happens after YUV + * range correction. Thus, the input to this stage is assumed to be + * in full-range YCbCr. + */ static void icl_program_input_csc(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, @@ -672,52 +678,7 @@ icl_program_input_csc(struct intel_plane *plane, 0x0, 0x7800, 0x7F10, }, }; - - /* Matrix for Limited Range to Full Range Conversion */ - static const u16 input_csc_matrix_lr[][9] = { - /* - * BT.601 Limted range YCbCr -> full range RGB - * The matrix required is : - * [1.164384, 0.000, 1.596027, - * 1.164384, -0.39175, -0.812813, - * 1.164384, 2.017232, 0.0000] - */ - [DRM_COLOR_YCBCR_BT601] = { - 0x7CC8, 0x7950, 0x0, - 0x8D00, 0x7950, 0x9C88, - 0x0, 0x7950, 0x6810, - }, - /* - * BT.709 Limited range YCbCr -> full range RGB - * The matrix required is : - * [1.164384, 0.000, 1.792741, - * 1.164384, -0.213249, -0.532909, - * 1.164384, 2.112402, 0.0000] - */ - [DRM_COLOR_YCBCR_BT709] = { - 0x7E58, 0x7950, 0x0, - 0x8888, 0x7950, 0xADA8, - 0x0, 0x7950, 0x6870, - }, - /* - * BT.2020 Limited range YCbCr -> full range RGB - * The matrix required is : - * [1.164, 0.000, 1.678, - * 1.164, -0.1873, -0.6504, - * 1.164, 2.1417, 0.0000] - */ - [DRM_COLOR_YCBCR_BT2020] = { - 0x7D70, 0x7950, 0x0, - 0x8A68, 0x7950, 0xAC00, - 0x0, 0x7950, 0x6890, - }, - }; - const u16 *csc; - - if (plane_state->hw.color_range == DRM_COLOR_YCBCR_FULL_RANGE) - csc = input_csc_matrix[plane_state->hw.color_encoding]; - else - csc = input_csc_matrix_lr[plane_state->hw.color_encoding]; + const u16 *csc = input_csc_matrix[plane_state->hw.color_encoding]; intel_de_write_fw(dev_priv, PLANE_INPUT_CSC_COEFF(pipe, plane_id, 0), ROFF(csc[0]) | GOFF(csc[1])); @@ -734,14 +695,8 @@ icl_program_input_csc(struct intel_plane *plane, intel_de_write_fw(dev_priv, PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 0), PREOFF_YUV_TO_RGB_HI); - if (plane_state->hw.color_range == DRM_COLOR_YCBCR_FULL_RANGE) - intel_de_write_fw(dev_priv, - PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 1), - 0); - else - intel_de_write_fw(dev_priv, - PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 1), - PREOFF_YUV_TO_RGB_ME); + intel_de_write_fw(dev_priv, PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 1), + PREOFF_YUV_TO_RGB_ME); intel_de_write_fw(dev_priv, PLANE_INPUT_CSC_PREOFF(pipe, plane_id, 2), PREOFF_YUV_TO_RGB_LO); intel_de_write_fw(dev_priv, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index fcce6909f201..3d435bfff764 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -387,48 +387,6 @@ err: return vma; } -static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_vma *vma; - - if (list_empty(&obj->vma.list)) - return; - - mutex_lock(&i915->ggtt.vm.mutex); - spin_lock(&obj->vma.lock); - for_each_ggtt_vma(vma, obj) { - if (!drm_mm_node_allocated(&vma->node)) - continue; - - GEM_BUG_ON(vma->vm != &i915->ggtt.vm); - list_move_tail(&vma->vm_link, &vma->vm->bound_list); - } - spin_unlock(&obj->vma.lock); - mutex_unlock(&i915->ggtt.vm.mutex); - - if (i915_gem_object_is_shrinkable(obj)) { - unsigned long flags; - - spin_lock_irqsave(&i915->mm.obj_lock, flags); - - if (obj->mm.madv == I915_MADV_WILLNEED && - !atomic_read(&obj->mm.shrink_pin)) - list_move_tail(&obj->mm.link, &i915->mm.shrink_list); - - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - } -} - -void -i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) -{ - /* Bump the LRU to try and avoid premature eviction whilst flipping */ - i915_gem_object_bump_inactive_ggtt(vma->obj); - - i915_vma_unpin(vma); -} - /** * Moves a single object to the CPU read, and possibly write domain. * @obj: object to act on @@ -569,9 +527,6 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, else err = i915_gem_object_set_to_cpu_domain(obj, write_domain); - /* And bump the LRU for this access */ - i915_gem_object_bump_inactive_ggtt(obj); - i915_gem_object_unlock(obj); if (write_domain) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index be14486f63a7..4556afe18f16 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -486,7 +486,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, const struct i915_ggtt_view *view, unsigned int flags); -void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma); void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj); void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 0625cbb3b431..1d1757584f49 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -187,18 +187,6 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) intel_engine_add_retire(b->irq_engine, tl); } -static bool __signal_request(struct i915_request *rq) -{ - GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); - - if (!__dma_fence_signal(&rq->fence)) { - i915_request_put(rq); - return false; - } - - return true; -} - static struct llist_node * slist_add(struct llist_node *node, struct llist_node *head) { @@ -269,9 +257,11 @@ static void signal_irq_work(struct irq_work *work) release = remove_signaling_context(b, ce); spin_unlock(&ce->signal_lock); - if (__signal_request(rq)) + if (__dma_fence_signal(&rq->fence)) /* We own signal_node now, xfer to local list */ signal = slist_add(&rq->signal_node, signal); + else + i915_request_put(rq); if (release) { add_retire(b, ce->timeline); @@ -358,6 +348,17 @@ void intel_breadcrumbs_free(struct intel_breadcrumbs *b) kfree(b); } +static void irq_signal_request(struct i915_request *rq, + struct intel_breadcrumbs *b) +{ + if (!__dma_fence_signal(&rq->fence)) + return; + + i915_request_get(rq); + if (llist_add(&rq->signal_node, &b->signaled_requests)) + irq_work_queue(&b->irq_work); +} + static void insert_breadcrumb(struct i915_request *rq) { struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; @@ -367,17 +368,13 @@ static void insert_breadcrumb(struct i915_request *rq) if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) return; - i915_request_get(rq); - /* * If the request is already completed, we can transfer it * straight onto a signaled list, and queue the irq worker for * its signal completion. */ if (__i915_request_is_complete(rq)) { - if (__signal_request(rq) && - llist_add(&rq->signal_node, &b->signaled_requests)) - irq_work_queue(&b->irq_work); + irq_signal_request(rq, b); return; } @@ -408,6 +405,8 @@ static void insert_breadcrumb(struct i915_request *rq) break; } } + + i915_request_get(rq); list_add_rcu(&rq->signal_link, pos); GEM_BUG_ON(!check_signal_order(ce, rq)); GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); @@ -448,19 +447,25 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq) void i915_request_cancel_breadcrumb(struct i915_request *rq) { + struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; struct intel_context *ce = rq->context; bool release; - if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) + spin_lock(&ce->signal_lock); + if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) { + spin_unlock(&ce->signal_lock); return; + } - spin_lock(&ce->signal_lock); list_del_rcu(&rq->signal_link); - release = remove_signaling_context(rq->engine->breadcrumbs, ce); + release = remove_signaling_context(b, ce); spin_unlock(&ce->signal_lock); if (release) intel_context_put(ce); + if (__i915_request_is_complete(rq)) + irq_signal_request(rq, b); + i915_request_put(rq); } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index c85b1af06b7b..7ea367a5444d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -547,7 +547,7 @@ nouveau_bo_sync_for_device(struct nouveau_bo *nvbo) { struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct ttm_tt *ttm_dma = (struct ttm_tt *)nvbo->bo.ttm; - int i; + int i, j; if (!ttm_dma) return; @@ -556,10 +556,21 @@ nouveau_bo_sync_for_device(struct nouveau_bo *nvbo) if (nvbo->force_coherent) return; - for (i = 0; i < ttm_dma->num_pages; i++) + for (i = 0; i < ttm_dma->num_pages; ++i) { + struct page *p = ttm_dma->pages[i]; + size_t num_pages = 1; + + for (j = i + 1; j < ttm_dma->num_pages; ++j) { + if (++p != ttm_dma->pages[j]) + break; + + ++num_pages; + } dma_sync_single_for_device(drm->dev->dev, ttm_dma->dma_address[i], - PAGE_SIZE, DMA_TO_DEVICE); + num_pages * PAGE_SIZE, DMA_TO_DEVICE); + i += num_pages; + } } void @@ -567,7 +578,7 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo) { struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct ttm_tt *ttm_dma = (struct ttm_tt *)nvbo->bo.ttm; - int i; + int i, j; if (!ttm_dma) return; @@ -576,9 +587,21 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo) if (nvbo->force_coherent) return; - for (i = 0; i < ttm_dma->num_pages; i++) + for (i = 0; i < ttm_dma->num_pages; ++i) { + struct page *p = ttm_dma->pages[i]; + size_t num_pages = 1; + + for (j = i + 1; j < ttm_dma->num_pages; ++j) { + if (++p != ttm_dma->pages[j]) + break; + + ++num_pages; + } + dma_sync_single_for_cpu(drm->dev->dev, ttm_dma->dma_address[i], - PAGE_SIZE, DMA_FROM_DEVICE); + num_pages * PAGE_SIZE, DMA_FROM_DEVICE); + i += num_pages; + } } void nouveau_bo_add_io_reserve_lru(struct ttm_buffer_object *bo) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 11e0313db0ea..74bf1c84b637 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -84,7 +84,7 @@ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags, * put_page() on a TTM allocated page is illegal. */ if (order) - gfp_flags |= __GFP_NOMEMALLOC | __GFP_NORETRY | + gfp_flags |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; if (!pool->use_dma_alloc) { diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c index 500abd27fb22..1b73647cc3b1 100644 --- a/drivers/i3c/master/mipi-i3c-hci/core.c +++ b/drivers/i3c/master/mipi-i3c-hci/core.c @@ -777,7 +777,7 @@ static int i3c_hci_remove(struct platform_device *pdev) return 0; } -static const struct __maybe_unused of_device_id i3c_hci_of_match[] = { +static const __maybe_unused struct of_device_id i3c_hci_of_match[] = { { .compatible = "mipi-i3c-hci", }, {}, }; diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 0687f0ed60b8..8cc8ca4a9ac0 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -215,9 +215,17 @@ static const struct xpad_device { { 0x0e6f, 0x0213, "Afterglow Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x021f, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0246, "Rock Candy Gamepad for Xbox One 2015", 0, XTYPE_XBOXONE }, - { 0x0e6f, 0x02ab, "PDP Controller for Xbox One", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02a0, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02a1, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02a2, "PDP Wired Controller for Xbox One - Crimson Red", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a4, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a6, "PDP Wired Controller for Xbox One - Camo Series", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02a7, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02a8, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02ab, "PDP Controller for Xbox One", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02ad, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02b3, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02b8, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0301, "Logic3 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0346, "Rock Candy Gamepad for Xbox One 2016", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0401, "Logic3 Controller", 0, XTYPE_XBOX360 }, @@ -296,6 +304,9 @@ static const struct xpad_device { { 0x1bad, 0xfa01, "MadCatz GamePad", 0, XTYPE_XBOX360 }, { 0x1bad, 0xfd00, "Razer Onza TE", 0, XTYPE_XBOX360 }, { 0x1bad, 0xfd01, "Razer Onza", 0, XTYPE_XBOX360 }, + { 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE }, + { 0x20d6, 0x281f, "PowerA Wired Controller For Xbox 360", 0, XTYPE_XBOX360 }, + { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, { 0x24c6, 0x5000, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x5300, "PowerA MINI PROEX Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5303, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 }, @@ -429,8 +440,12 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x162e), /* Joytech X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ XPAD_XBOX360_VENDOR(0x1bad), /* Harminix Rock Band Guitar and Drums */ + XPAD_XBOX360_VENDOR(0x20d6), /* PowerA Controllers */ + XPAD_XBOXONE_VENDOR(0x20d6), /* PowerA Controllers */ XPAD_XBOX360_VENDOR(0x24c6), /* PowerA Controllers */ XPAD_XBOXONE_VENDOR(0x24c6), /* PowerA Controllers */ + XPAD_XBOXONE_VENDOR(0x2e24), /* Hyperkin Duke X-Box One pad */ + XPAD_XBOX360_VENDOR(0x2f24), /* GameSir Controllers */ { } }; diff --git a/drivers/input/misc/ariel-pwrbutton.c b/drivers/input/misc/ariel-pwrbutton.c index eda86ab552b9..17bbaac8b80c 100644 --- a/drivers/input/misc/ariel-pwrbutton.c +++ b/drivers/input/misc/ariel-pwrbutton.c @@ -149,12 +149,6 @@ static const struct of_device_id ariel_pwrbutton_of_match[] = { }; MODULE_DEVICE_TABLE(of, ariel_pwrbutton_of_match); -static const struct spi_device_id ariel_pwrbutton_id_table[] = { - { "wyse-ariel-ec-input", 0 }, - {} -}; -MODULE_DEVICE_TABLE(spi, ariel_pwrbutton_id_table); - static struct spi_driver ariel_pwrbutton_driver = { .driver = { .name = "dell-wyse-ariel-ec-input", diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 3a2dcf0805f1..c74b020796a9 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -219,6 +219,8 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "PEGATRON CORPORATION"), DMI_MATCH(DMI_PRODUCT_NAME, "C15B"), }, + }, + { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ByteSpeed LLC"), DMI_MATCH(DMI_PRODUCT_NAME, "ByteSpeed Laptop C15B"), diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 19765f1c04f7..c682b028f0a2 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -157,6 +157,7 @@ static const struct goodix_chip_id goodix_chip_ids[] = { { .id = "5663", .data = >1x_chip_data }, { .id = "5688", .data = >1x_chip_data }, { .id = "917S", .data = >1x_chip_data }, + { .id = "9286", .data = >1x_chip_data }, { .id = "911", .data = >911_chip_data }, { .id = "9271", .data = >911_chip_data }, @@ -1448,6 +1449,7 @@ static const struct of_device_id goodix_of_match[] = { { .compatible = "goodix,gt927" }, { .compatible = "goodix,gt9271" }, { .compatible = "goodix,gt928" }, + { .compatible = "goodix,gt9286" }, { .compatible = "goodix,gt967" }, { } }; diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index 199cf3daec10..d8fccf048bf4 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -29,11 +29,13 @@ struct ili2xxx_chip { void *buf, size_t len); int (*get_touch_data)(struct i2c_client *client, u8 *data); bool (*parse_touch_data)(const u8 *data, unsigned int finger, - unsigned int *x, unsigned int *y); + unsigned int *x, unsigned int *y, + unsigned int *z); bool (*continue_polling)(const u8 *data, bool touch); unsigned int max_touches; unsigned int resolution; bool has_calibrate_reg; + bool has_pressure_reg; }; struct ili210x { @@ -82,7 +84,8 @@ static int ili210x_read_touch_data(struct i2c_client *client, u8 *data) static bool ili210x_touchdata_to_coords(const u8 *touchdata, unsigned int finger, - unsigned int *x, unsigned int *y) + unsigned int *x, unsigned int *y, + unsigned int *z) { if (touchdata[0] & BIT(finger)) return false; @@ -137,7 +140,8 @@ static int ili211x_read_touch_data(struct i2c_client *client, u8 *data) static bool ili211x_touchdata_to_coords(const u8 *touchdata, unsigned int finger, - unsigned int *x, unsigned int *y) + unsigned int *x, unsigned int *y, + unsigned int *z) { u32 data; @@ -169,7 +173,8 @@ static const struct ili2xxx_chip ili211x_chip = { static bool ili212x_touchdata_to_coords(const u8 *touchdata, unsigned int finger, - unsigned int *x, unsigned int *y) + unsigned int *x, unsigned int *y, + unsigned int *z) { u16 val; @@ -235,7 +240,8 @@ static int ili251x_read_touch_data(struct i2c_client *client, u8 *data) static bool ili251x_touchdata_to_coords(const u8 *touchdata, unsigned int finger, - unsigned int *x, unsigned int *y) + unsigned int *x, unsigned int *y, + unsigned int *z) { u16 val; @@ -245,6 +251,7 @@ static bool ili251x_touchdata_to_coords(const u8 *touchdata, *x = val & 0x3fff; *y = get_unaligned_be16(touchdata + 1 + (finger * 5) + 2); + *z = touchdata[1 + (finger * 5) + 4]; return true; } @@ -261,6 +268,7 @@ static const struct ili2xxx_chip ili251x_chip = { .continue_polling = ili251x_check_continue_polling, .max_touches = 10, .has_calibrate_reg = true, + .has_pressure_reg = true, }; static bool ili210x_report_events(struct ili210x *priv, u8 *touchdata) @@ -268,14 +276,16 @@ static bool ili210x_report_events(struct ili210x *priv, u8 *touchdata) struct input_dev *input = priv->input; int i; bool contact = false, touch; - unsigned int x = 0, y = 0; + unsigned int x = 0, y = 0, z = 0; for (i = 0; i < priv->chip->max_touches; i++) { - touch = priv->chip->parse_touch_data(touchdata, i, &x, &y); + touch = priv->chip->parse_touch_data(touchdata, i, &x, &y, &z); input_mt_slot(input, i); if (input_mt_report_slot_state(input, MT_TOOL_FINGER, touch)) { touchscreen_report_pos(input, &priv->prop, x, y, true); + if (priv->chip->has_pressure_reg) + input_report_abs(input, ABS_MT_PRESSURE, z); contact = true; } } @@ -437,6 +447,8 @@ static int ili210x_i2c_probe(struct i2c_client *client, max_xy = (chip->resolution ?: SZ_64K) - 1; input_set_abs_params(input, ABS_MT_POSITION_X, 0, max_xy, 0, 0); input_set_abs_params(input, ABS_MT_POSITION_Y, 0, max_xy, 0, 0); + if (priv->chip->has_pressure_reg) + input_set_abs_params(input, ABS_MT_PRESSURE, 0, 0xa, 0, 0); touchscreen_parse_properties(input, true, &priv->prop); error = input_mt_init_slots(input, priv->chip->max_touches, diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c index bda96762744e..b4e7bcbe9b91 100644 --- a/drivers/input/touchscreen/st1232.c +++ b/drivers/input/touchscreen/st1232.c @@ -26,6 +26,20 @@ #define ST1232_TS_NAME "st1232-ts" #define ST1633_TS_NAME "st1633-ts" +#define REG_STATUS 0x01 /* Device Status | Error Code */ + +#define STATUS_NORMAL 0x00 +#define STATUS_INIT 0x01 +#define STATUS_ERROR 0x02 +#define STATUS_AUTO_TUNING 0x03 +#define STATUS_IDLE 0x04 +#define STATUS_POWER_DOWN 0x05 + +#define ERROR_NONE 0x00 +#define ERROR_INVALID_ADDRESS 0x10 +#define ERROR_INVALID_VALUE 0x20 +#define ERROR_INVALID_PLATFORM 0x30 + #define REG_XY_RESOLUTION 0x04 #define REG_XY_COORDINATES 0x12 #define ST_TS_MAX_FINGERS 10 @@ -47,7 +61,8 @@ struct st1232_ts_data { u8 *read_buf; }; -static int st1232_ts_read_data(struct st1232_ts_data *ts, u8 reg) +static int st1232_ts_read_data(struct st1232_ts_data *ts, u8 reg, + unsigned int n) { struct i2c_client *client = ts->client; struct i2c_msg msg[] = { @@ -59,7 +74,7 @@ static int st1232_ts_read_data(struct st1232_ts_data *ts, u8 reg) { .addr = client->addr, .flags = I2C_M_RD | I2C_M_DMA_SAFE, - .len = ts->read_buf_len, + .len = n, .buf = ts->read_buf, } }; @@ -72,6 +87,22 @@ static int st1232_ts_read_data(struct st1232_ts_data *ts, u8 reg) return 0; } +static int st1232_ts_wait_ready(struct st1232_ts_data *ts) +{ + unsigned int retries; + int error; + + for (retries = 10; retries; retries--) { + error = st1232_ts_read_data(ts, REG_STATUS, 1); + if (!error && ts->read_buf[0] == (STATUS_NORMAL | ERROR_NONE)) + return 0; + + usleep_range(1000, 2000); + } + + return -ENXIO; +} + static int st1232_ts_read_resolution(struct st1232_ts_data *ts, u16 *max_x, u16 *max_y) { @@ -79,14 +110,14 @@ static int st1232_ts_read_resolution(struct st1232_ts_data *ts, u16 *max_x, int error; /* select resolution register */ - error = st1232_ts_read_data(ts, REG_XY_RESOLUTION); + error = st1232_ts_read_data(ts, REG_XY_RESOLUTION, 3); if (error) return error; buf = ts->read_buf; - *max_x = ((buf[0] & 0x0070) << 4) | buf[1]; - *max_y = ((buf[0] & 0x0007) << 8) | buf[2]; + *max_x = (((buf[0] & 0x0070) << 4) | buf[1]) - 1; + *max_y = (((buf[0] & 0x0007) << 8) | buf[2]) - 1; return 0; } @@ -140,7 +171,7 @@ static irqreturn_t st1232_ts_irq_handler(int irq, void *dev_id) int count; int error; - error = st1232_ts_read_data(ts, REG_XY_COORDINATES); + error = st1232_ts_read_data(ts, REG_XY_COORDINATES, ts->read_buf_len); if (error) goto out; @@ -251,6 +282,11 @@ static int st1232_ts_probe(struct i2c_client *client, input_dev->name = "st1232-touchscreen"; input_dev->id.bustype = BUS_I2C; + /* Wait until device is ready */ + error = st1232_ts_wait_ready(ts); + if (error) + return error; + /* Read resolution from the chip */ error = st1232_ts_read_resolution(ts, &max_x, &max_y); if (error) { diff --git a/drivers/mmc/core/sdio_cis.c b/drivers/mmc/core/sdio_cis.c index 44bea5e4aeda..b23773583179 100644 --- a/drivers/mmc/core/sdio_cis.c +++ b/drivers/mmc/core/sdio_cis.c @@ -20,6 +20,8 @@ #include "sdio_cis.h" #include "sdio_ops.h" +#define SDIO_READ_CIS_TIMEOUT_MS (10 * 1000) /* 10s */ + static int cistpl_vers_1(struct mmc_card *card, struct sdio_func *func, const unsigned char *buf, unsigned size) { @@ -274,6 +276,8 @@ static int sdio_read_cis(struct mmc_card *card, struct sdio_func *func) do { unsigned char tpl_code, tpl_link; + unsigned long timeout = jiffies + + msecs_to_jiffies(SDIO_READ_CIS_TIMEOUT_MS); ret = mmc_io_rw_direct(card, 0, 0, ptr++, 0, &tpl_code); if (ret) @@ -326,6 +330,8 @@ static int sdio_read_cis(struct mmc_card *card, struct sdio_func *func) prev = &this->next; if (ret == -ENOENT) { + if (time_after(jiffies, timeout)) + break; /* warn about unknown tuples */ pr_warn_ratelimited("%s: queuing unknown" " CIS tuple 0x%02x (%u bytes)\n", diff --git a/drivers/mmc/host/sdhci-pltfm.h b/drivers/mmc/host/sdhci-pltfm.h index 6301b81cf573..9bd717ff784b 100644 --- a/drivers/mmc/host/sdhci-pltfm.h +++ b/drivers/mmc/host/sdhci-pltfm.h @@ -111,8 +111,13 @@ static inline void *sdhci_pltfm_priv(struct sdhci_pltfm_host *host) return host->private; } +extern const struct dev_pm_ops sdhci_pltfm_pmops; +#ifdef CONFIG_PM_SLEEP int sdhci_pltfm_suspend(struct device *dev); int sdhci_pltfm_resume(struct device *dev); -extern const struct dev_pm_ops sdhci_pltfm_pmops; +#else +static inline int sdhci_pltfm_suspend(struct device *dev) { return 0; } +static inline int sdhci_pltfm_resume(struct device *dev) { return 0; } +#endif #endif /* _DRIVERS_MMC_SDHCI_PLTFM_H */ diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 36e2e41ed2aa..f4990ff32fa4 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -36,7 +36,7 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o obj-$(CONFIG_VSOCKMON) += vsockmon.o -obj-$(CONFIG_MHI_NET) += mhi_net.o +obj-$(CONFIG_MHI_NET) += mhi/ # # Networking Drivers diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index aa001b16765a..6908822d9773 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -73,6 +73,8 @@ enum ad_link_speed_type { AD_LINK_SPEED_50000MBPS, AD_LINK_SPEED_56000MBPS, AD_LINK_SPEED_100000MBPS, + AD_LINK_SPEED_200000MBPS, + AD_LINK_SPEED_400000MBPS, }; /* compare MAC addresses */ @@ -245,6 +247,8 @@ static inline int __check_agg_selection_timer(struct port *port) * %AD_LINK_SPEED_50000MBPS * %AD_LINK_SPEED_56000MBPS * %AD_LINK_SPEED_100000MBPS + * %AD_LINK_SPEED_200000MBPS + * %AD_LINK_SPEED_400000MBPS */ static u16 __get_link_speed(struct port *port) { @@ -312,13 +316,21 @@ static u16 __get_link_speed(struct port *port) speed = AD_LINK_SPEED_100000MBPS; break; + case SPEED_200000: + speed = AD_LINK_SPEED_200000MBPS; + break; + + case SPEED_400000: + speed = AD_LINK_SPEED_400000MBPS; + break; + default: /* unknown speed value from ethtool. shouldn't happen */ if (slave->speed != SPEED_UNKNOWN) - pr_warn_once("%s: (slave %s): unknown ethtool speed (%d) for port %d (set it to 0)\n", - slave->bond->dev->name, - slave->dev->name, slave->speed, - port->actor_port_number); + pr_err_once("%s: (slave %s): unknown ethtool speed (%d) for port %d (set it to 0)\n", + slave->bond->dev->name, + slave->dev->name, slave->speed, + port->actor_port_number); speed = 0; break; } @@ -733,6 +745,12 @@ static u32 __get_agg_bandwidth(struct aggregator *aggregator) case AD_LINK_SPEED_100000MBPS: bandwidth = nports * 100000; break; + case AD_LINK_SPEED_200000MBPS: + bandwidth = nports * 200000; + break; + case AD_LINK_SPEED_400000MBPS: + bandwidth = nports * 400000; + break; default: bandwidth = 0; /* to silence the compiler */ } diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 1bd5aea12b25..386468e66c41 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -697,9 +697,24 @@ static void felix_phylink_mac_link_down(struct dsa_switch *ds, int port, { struct ocelot *ocelot = ds->priv; struct ocelot_port *ocelot_port = ocelot->ports[port]; + int err; + + ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA, + DEV_MAC_ENA_CFG); - ocelot_port_writel(ocelot_port, 0, DEV_MAC_ENA_CFG); ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 0); + + err = ocelot_port_flush(ocelot, port); + if (err) + dev_err(ocelot->dev, "failed to flush port %d: %d\n", + port, err); + + /* Put the port in reset. */ + ocelot_port_writel(ocelot_port, + DEV_CLOCK_CFG_MAC_TX_RST | + DEV_CLOCK_CFG_MAC_RX_RST | + DEV_CLOCK_CFG_LINK_SPEED(OCELOT_SPEED_1000), + DEV_CLOCK_CFG); } static void felix_phylink_mac_link_up(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/xrs700x/xrs700x.c b/drivers/net/dsa/xrs700x/xrs700x.c index 259f5e657c46..f025f968f96d 100644 --- a/drivers/net/dsa/xrs700x/xrs700x.c +++ b/drivers/net/dsa/xrs700x/xrs700x.c @@ -7,11 +7,17 @@ #include <net/dsa.h> #include <linux/if_bridge.h> #include <linux/of_device.h> +#include <linux/netdev_features.h> +#include <linux/if_hsr.h> #include "xrs700x.h" #include "xrs700x_reg.h" #define XRS700X_MIB_INTERVAL msecs_to_jiffies(3000) +#define XRS7000X_SUPPORTED_HSR_FEATURES \ + (NETIF_F_HW_HSR_TAG_INS | NETIF_F_HW_HSR_TAG_RM | \ + NETIF_F_HW_HSR_FWD | NETIF_F_HW_HSR_DUP) + #define XRS7003E_ID 0x100 #define XRS7003F_ID 0x101 #define XRS7004E_ID 0x200 @@ -496,6 +502,119 @@ static void xrs700x_bridge_leave(struct dsa_switch *ds, int port, xrs700x_bridge_common(ds, port, bridge, false); } +static int xrs700x_hsr_join(struct dsa_switch *ds, int port, + struct net_device *hsr) +{ + unsigned int val = XRS_HSR_CFG_HSR_PRP; + struct dsa_port *partner = NULL, *dp; + struct xrs700x *priv = ds->priv; + struct net_device *slave; + int ret, i, hsr_pair[2]; + enum hsr_version ver; + + ret = hsr_get_version(hsr, &ver); + if (ret) + return ret; + + /* Only ports 1 and 2 can be HSR/PRP redundant ports. */ + if (port != 1 && port != 2) + return -EOPNOTSUPP; + + if (ver == HSR_V1) + val |= XRS_HSR_CFG_HSR; + else if (ver == PRP_V1) + val |= XRS_HSR_CFG_PRP; + else + return -EOPNOTSUPP; + + dsa_hsr_foreach_port(dp, ds, hsr) { + partner = dp; + } + + /* We can't enable redundancy on the switch until both + * redundant ports have signed up. + */ + if (!partner) + return 0; + + regmap_fields_write(priv->ps_forward, partner->index, + XRS_PORT_DISABLED); + regmap_fields_write(priv->ps_forward, port, XRS_PORT_DISABLED); + + regmap_write(priv->regmap, XRS_HSR_CFG(partner->index), + val | XRS_HSR_CFG_LANID_A); + regmap_write(priv->regmap, XRS_HSR_CFG(port), + val | XRS_HSR_CFG_LANID_B); + + /* Clear bits for both redundant ports (HSR only) and the CPU port to + * enable forwarding. + */ + val = GENMASK(ds->num_ports - 1, 0); + if (ver == HSR_V1) { + val &= ~BIT(partner->index); + val &= ~BIT(port); + } + val &= ~BIT(dsa_upstream_port(ds, port)); + regmap_write(priv->regmap, XRS_PORT_FWD_MASK(partner->index), val); + regmap_write(priv->regmap, XRS_PORT_FWD_MASK(port), val); + + regmap_fields_write(priv->ps_forward, partner->index, + XRS_PORT_FORWARDING); + regmap_fields_write(priv->ps_forward, port, XRS_PORT_FORWARDING); + + hsr_pair[0] = port; + hsr_pair[1] = partner->index; + for (i = 0; i < ARRAY_SIZE(hsr_pair); i++) { + slave = dsa_to_port(ds, hsr_pair[i])->slave; + slave->features |= XRS7000X_SUPPORTED_HSR_FEATURES; + } + + return 0; +} + +static int xrs700x_hsr_leave(struct dsa_switch *ds, int port, + struct net_device *hsr) +{ + struct dsa_port *partner = NULL, *dp; + struct xrs700x *priv = ds->priv; + struct net_device *slave; + int i, hsr_pair[2]; + unsigned int val; + + dsa_hsr_foreach_port(dp, ds, hsr) { + partner = dp; + } + + if (!partner) + return 0; + + regmap_fields_write(priv->ps_forward, partner->index, + XRS_PORT_DISABLED); + regmap_fields_write(priv->ps_forward, port, XRS_PORT_DISABLED); + + regmap_write(priv->regmap, XRS_HSR_CFG(partner->index), 0); + regmap_write(priv->regmap, XRS_HSR_CFG(port), 0); + + /* Clear bit for the CPU port to enable forwarding. */ + val = GENMASK(ds->num_ports - 1, 0); + val &= ~BIT(dsa_upstream_port(ds, port)); + regmap_write(priv->regmap, XRS_PORT_FWD_MASK(partner->index), val); + regmap_write(priv->regmap, XRS_PORT_FWD_MASK(port), val); + + regmap_fields_write(priv->ps_forward, partner->index, + XRS_PORT_FORWARDING); + regmap_fields_write(priv->ps_forward, port, XRS_PORT_FORWARDING); + + hsr_pair[0] = port; + hsr_pair[1] = partner->index; + for (i = 0; i < ARRAY_SIZE(hsr_pair); i++) { + slave = dsa_to_port(ds, hsr_pair[i])->slave; + slave->features &= ~XRS7000X_SUPPORTED_HSR_FEATURES; + } + + return 0; +} + static const struct dsa_switch_ops xrs700x_ops = { .get_tag_protocol = xrs700x_get_tag_protocol, .setup = xrs700x_setup, @@ -509,6 +628,8 @@ static const struct dsa_switch_ops xrs700x_ops = { .get_stats64 = xrs700x_get_stats64, .port_bridge_join = xrs700x_bridge_join, .port_bridge_leave = xrs700x_bridge_leave, + .port_hsr_join = xrs700x_hsr_join, + .port_hsr_leave = xrs700x_hsr_leave, }; static int xrs700x_detect(struct xrs700x *priv) diff --git a/drivers/net/dsa/xrs700x/xrs700x_i2c.c b/drivers/net/dsa/xrs700x/xrs700x_i2c.c index 16a46a78a037..489d9385b4f0 100644 --- a/drivers/net/dsa/xrs700x/xrs700x_i2c.c +++ b/drivers/net/dsa/xrs700x/xrs700x_i2c.c @@ -121,7 +121,7 @@ static const struct i2c_device_id xrs700x_i2c_id[] = { MODULE_DEVICE_TABLE(i2c, xrs700x_i2c_id); -static const struct of_device_id xrs700x_i2c_dt_ids[] = { +static const struct of_device_id __maybe_unused xrs700x_i2c_dt_ids[] = { { .compatible = "arrow,xrs7003e", .data = &xrs7003e_info }, { .compatible = "arrow,xrs7003f", .data = &xrs7003f_info }, { .compatible = "arrow,xrs7004e", .data = &xrs7004e_info }, diff --git a/drivers/net/dsa/xrs700x/xrs700x_mdio.c b/drivers/net/dsa/xrs700x/xrs700x_mdio.c index a10ee28eb86e..44f58bee04a4 100644 --- a/drivers/net/dsa/xrs700x/xrs700x_mdio.c +++ b/drivers/net/dsa/xrs700x/xrs700x_mdio.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/phy.h> #include <linux/if_vlan.h> +#include <linux/of.h> #include "xrs700x.h" #include "xrs700x_reg.h" @@ -138,7 +139,7 @@ static void xrs700x_mdio_remove(struct mdio_device *mdiodev) xrs700x_switch_remove(priv); } -static const struct of_device_id xrs700x_mdio_dt_ids[] = { +static const struct of_device_id __maybe_unused xrs700x_mdio_dt_ids[] = { { .compatible = "arrow,xrs7003e", .data = &xrs7003e_info }, { .compatible = "arrow,xrs7003f", .data = &xrs7003f_info }, { .compatible = "arrow,xrs7004e", .data = &xrs7004e_info }, @@ -150,7 +151,7 @@ MODULE_DEVICE_TABLE(of, xrs700x_mdio_dt_ids); static struct mdio_driver xrs700x_mdio_driver = { .mdiodrv.driver = { .name = "xrs700x-mdio", - .of_match_table = xrs700x_mdio_dt_ids, + .of_match_table = of_match_ptr(xrs700x_mdio_dt_ids), }, .probe = xrs700x_mdio_probe, .remove = xrs700x_mdio_remove, diff --git a/drivers/net/dsa/xrs700x/xrs700x_reg.h b/drivers/net/dsa/xrs700x/xrs700x_reg.h index a135d4d92b6d..470d00e07f15 100644 --- a/drivers/net/dsa/xrs700x/xrs700x_reg.h +++ b/drivers/net/dsa/xrs700x/xrs700x_reg.h @@ -49,6 +49,11 @@ /* Port Configuration Registers - HSR/PRP */ #define XRS_HSR_CFG(x) (XRS_PORT_HSR_BASE(x) + 0x0) +#define XRS_HSR_CFG_HSR_PRP BIT(0) +#define XRS_HSR_CFG_HSR 0 +#define XRS_HSR_CFG_PRP BIT(8) +#define XRS_HSR_CFG_LANID_A 0 +#define XRS_HSR_CFG_LANID_B BIT(10) /* Port Configuration Registers - PTP */ #define XRS_PTP_RX_SYNC_DELAY_NS_LO(x) (XRS_PORT_PTP_BASE(x) + 0x2) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 1db6cfd2b55c..102f2c91fdb8 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -404,6 +404,7 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) if (unlikely(!xdpf)) { trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); xdp_stat = &rx_ring->rx_stats.xdp_aborted; + verdict = XDP_ABORTED; break; } @@ -424,7 +425,10 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) xdp_stat = &rx_ring->rx_stats.xdp_redirect; break; } - fallthrough; + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); + xdp_stat = &rx_ring->rx_stats.xdp_aborted; + verdict = XDP_ABORTED; + break; case XDP_ABORTED: trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); xdp_stat = &rx_ring->rx_stats.xdp_aborted; diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 4bdf8fbe75a6..f8a168b73307 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -51,6 +51,14 @@ config B44_PCI depends on B44_PCI_AUTOSELECT && B44_PCICORE_AUTOSELECT default y +config BCM4908_ENET + tristate "Broadcom BCM4908 internal mac support" + depends on ARCH_BCM4908 || COMPILE_TEST + default y + help + This driver supports Ethernet controller integrated into Broadcom + BCM4908 family SoCs. + config BCM63XX_ENET tristate "Broadcom 63xx internal mac support" depends on BCM63XX diff --git a/drivers/net/ethernet/broadcom/Makefile b/drivers/net/ethernet/broadcom/Makefile index 7046ad6d3d0e..0ddfb5b5d53c 100644 --- a/drivers/net/ethernet/broadcom/Makefile +++ b/drivers/net/ethernet/broadcom/Makefile @@ -4,6 +4,7 @@ # obj-$(CONFIG_B44) += b44.o +obj-$(CONFIG_BCM4908_ENET) += bcm4908_enet.o obj-$(CONFIG_BCM63XX_ENET) += bcm63xx_enet.o obj-$(CONFIG_BCMGENET) += genet/ obj-$(CONFIG_BNX2) += bnx2.o diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c new file mode 100644 index 000000000000..0da8c8c73ba7 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c @@ -0,0 +1,670 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 RafaÅ‚ MiÅ‚ecki <rafal@milecki.pl> + */ + +#include <linux/delay.h> +#include <linux/etherdevice.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/string.h> + +#include "bcm4908_enet.h" +#include "unimac.h" + +#define ENET_DMA_CH_RX_CFG ENET_DMA_CH0_CFG +#define ENET_DMA_CH_TX_CFG ENET_DMA_CH1_CFG +#define ENET_DMA_CH_RX_STATE_RAM ENET_DMA_CH0_STATE_RAM +#define ENET_DMA_CH_TX_STATE_RAM ENET_DMA_CH1_STATE_RAM + +#define ENET_TX_BDS_NUM 200 +#define ENET_RX_BDS_NUM 200 +#define ENET_RX_BDS_NUM_MAX 8192 + +#define ENET_DMA_INT_DEFAULTS (ENET_DMA_CH_CFG_INT_DONE | \ + ENET_DMA_CH_CFG_INT_NO_DESC | \ + ENET_DMA_CH_CFG_INT_BUFF_DONE) +#define ENET_DMA_MAX_BURST_LEN 8 /* in 64 bit words */ + +#define ENET_MTU_MIN 60 +#define ENET_MTU_MAX 1500 /* Is it possible to support 2044? */ +#define ENET_MTU_MAX_EXTRA_SIZE 32 /* L2 */ + +struct bcm4908_enet_dma_ring_bd { + __le32 ctl; + __le32 addr; +} __packed; + +struct bcm4908_enet_dma_ring_slot { + struct sk_buff *skb; + unsigned int len; + dma_addr_t dma_addr; +}; + +struct bcm4908_enet_dma_ring { + int is_tx; + int read_idx; + int write_idx; + int length; + u16 cfg_block; + u16 st_ram_block; + + union { + void *cpu_addr; + struct bcm4908_enet_dma_ring_bd *buf_desc; + }; + dma_addr_t dma_addr; + + struct bcm4908_enet_dma_ring_slot *slots; +}; + +struct bcm4908_enet { + struct device *dev; + struct net_device *netdev; + struct napi_struct napi; + void __iomem *base; + + struct bcm4908_enet_dma_ring tx_ring; + struct bcm4908_enet_dma_ring rx_ring; +}; + +/*** + * R/W ops + */ + +static u32 enet_read(struct bcm4908_enet *enet, u16 offset) +{ + return readl(enet->base + offset); +} + +static void enet_write(struct bcm4908_enet *enet, u16 offset, u32 value) +{ + writel(value, enet->base + offset); +} + +static void enet_maskset(struct bcm4908_enet *enet, u16 offset, u32 mask, u32 set) +{ + u32 val; + + WARN_ON(set & ~mask); + + val = enet_read(enet, offset); + val = (val & ~mask) | (set & mask); + enet_write(enet, offset, val); +} + +static void enet_set(struct bcm4908_enet *enet, u16 offset, u32 set) +{ + enet_maskset(enet, offset, set, set); +} + +static u32 enet_umac_read(struct bcm4908_enet *enet, u16 offset) +{ + return enet_read(enet, ENET_UNIMAC + offset); +} + +static void enet_umac_write(struct bcm4908_enet *enet, u16 offset, u32 value) +{ + enet_write(enet, ENET_UNIMAC + offset, value); +} + +static void enet_umac_set(struct bcm4908_enet *enet, u16 offset, u32 set) +{ + enet_set(enet, ENET_UNIMAC + offset, set); +} + +/*** + * Helpers + */ + +static void bcm4908_enet_intrs_on(struct bcm4908_enet *enet) +{ + enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_MASK, ENET_DMA_INT_DEFAULTS); +} + +static void bcm4908_enet_intrs_off(struct bcm4908_enet *enet) +{ + enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_MASK, 0); +} + +static void bcm4908_enet_intrs_ack(struct bcm4908_enet *enet) +{ + enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_STAT, ENET_DMA_INT_DEFAULTS); +} + +/*** + * DMA + */ + +static int bcm4908_dma_alloc_buf_descs(struct bcm4908_enet *enet, + struct bcm4908_enet_dma_ring *ring) +{ + int size = ring->length * sizeof(struct bcm4908_enet_dma_ring_bd); + struct device *dev = enet->dev; + + ring->cpu_addr = dma_alloc_coherent(dev, size, &ring->dma_addr, GFP_KERNEL); + if (!ring->cpu_addr) + return -ENOMEM; + + if (((uintptr_t)ring->cpu_addr) & (0x40 - 1)) { + dev_err(dev, "Invalid DMA ring alignment\n"); + goto err_free_buf_descs; + } + + ring->slots = kzalloc(ring->length * sizeof(*ring->slots), GFP_KERNEL); + if (!ring->slots) + goto err_free_buf_descs; + + ring->read_idx = 0; + ring->write_idx = 0; + + return 0; + +err_free_buf_descs: + dma_free_coherent(dev, size, ring->cpu_addr, ring->dma_addr); + return -ENOMEM; +} + +static void bcm4908_enet_dma_free(struct bcm4908_enet *enet) +{ + struct bcm4908_enet_dma_ring *tx_ring = &enet->tx_ring; + struct bcm4908_enet_dma_ring *rx_ring = &enet->rx_ring; + struct device *dev = enet->dev; + int size; + + size = rx_ring->length * sizeof(struct bcm4908_enet_dma_ring_bd); + if (rx_ring->cpu_addr) + dma_free_coherent(dev, size, rx_ring->cpu_addr, rx_ring->dma_addr); + kfree(rx_ring->slots); + + size = tx_ring->length * sizeof(struct bcm4908_enet_dma_ring_bd); + if (tx_ring->cpu_addr) + dma_free_coherent(dev, size, tx_ring->cpu_addr, tx_ring->dma_addr); + kfree(tx_ring->slots); +} + +static int bcm4908_enet_dma_alloc(struct bcm4908_enet *enet) +{ + struct bcm4908_enet_dma_ring *tx_ring = &enet->tx_ring; + struct bcm4908_enet_dma_ring *rx_ring = &enet->rx_ring; + struct device *dev = enet->dev; + int err; + + tx_ring->length = ENET_TX_BDS_NUM; + tx_ring->is_tx = 1; + tx_ring->cfg_block = ENET_DMA_CH_TX_CFG; + tx_ring->st_ram_block = ENET_DMA_CH_TX_STATE_RAM; + err = bcm4908_dma_alloc_buf_descs(enet, tx_ring); + if (err) { + dev_err(dev, "Failed to alloc TX buf descriptors: %d\n", err); + return err; + } + + rx_ring->length = ENET_RX_BDS_NUM; + rx_ring->is_tx = 0; + rx_ring->cfg_block = ENET_DMA_CH_RX_CFG; + rx_ring->st_ram_block = ENET_DMA_CH_RX_STATE_RAM; + err = bcm4908_dma_alloc_buf_descs(enet, rx_ring); + if (err) { + dev_err(dev, "Failed to alloc RX buf descriptors: %d\n", err); + bcm4908_enet_dma_free(enet); + return err; + } + + return 0; +} + +static void bcm4908_enet_dma_reset(struct bcm4908_enet *enet) +{ + struct bcm4908_enet_dma_ring *rings[] = { &enet->rx_ring, &enet->tx_ring }; + int i; + + /* Disable the DMA controller and channel */ + for (i = 0; i < ARRAY_SIZE(rings); i++) + enet_write(enet, rings[i]->cfg_block + ENET_DMA_CH_CFG, 0); + enet_maskset(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_MASTER_EN, 0); + + /* Reset channels state */ + for (i = 0; i < ARRAY_SIZE(rings); i++) { + struct bcm4908_enet_dma_ring *ring = rings[i]; + + enet_write(enet, ring->st_ram_block + ENET_DMA_CH_STATE_RAM_BASE_DESC_PTR, 0); + enet_write(enet, ring->st_ram_block + ENET_DMA_CH_STATE_RAM_STATE_DATA, 0); + enet_write(enet, ring->st_ram_block + ENET_DMA_CH_STATE_RAM_DESC_LEN_STATUS, 0); + enet_write(enet, ring->st_ram_block + ENET_DMA_CH_STATE_RAM_DESC_BASE_BUFPTR, 0); + } +} + +static int bcm4908_enet_dma_alloc_rx_buf(struct bcm4908_enet *enet, unsigned int idx) +{ + struct bcm4908_enet_dma_ring_bd *buf_desc = &enet->rx_ring.buf_desc[idx]; + struct bcm4908_enet_dma_ring_slot *slot = &enet->rx_ring.slots[idx]; + struct device *dev = enet->dev; + u32 tmp; + int err; + + slot->len = ENET_MTU_MAX + ENET_MTU_MAX_EXTRA_SIZE; + + slot->skb = netdev_alloc_skb(enet->netdev, slot->len); + if (!slot->skb) + return -ENOMEM; + + slot->dma_addr = dma_map_single(dev, slot->skb->data, slot->len, DMA_FROM_DEVICE); + err = dma_mapping_error(dev, slot->dma_addr); + if (err) { + dev_err(dev, "Failed to map DMA buffer: %d\n", err); + kfree_skb(slot->skb); + slot->skb = NULL; + return err; + } + + tmp = slot->len << DMA_CTL_LEN_DESC_BUFLENGTH_SHIFT; + tmp |= DMA_CTL_STATUS_OWN; + if (idx == enet->rx_ring.length - 1) + tmp |= DMA_CTL_STATUS_WRAP; + buf_desc->ctl = cpu_to_le32(tmp); + buf_desc->addr = cpu_to_le32(slot->dma_addr); + + return 0; +} + +static void bcm4908_enet_dma_ring_init(struct bcm4908_enet *enet, + struct bcm4908_enet_dma_ring *ring) +{ + int reset_channel = 0; /* We support only 1 main channel (with TX and RX) */ + int reset_subch = ring->is_tx ? 1 : 0; + + /* Reset the DMA channel */ + enet_write(enet, ENET_DMA_CTRL_CHANNEL_RESET, BIT(reset_channel * 2 + reset_subch)); + enet_write(enet, ENET_DMA_CTRL_CHANNEL_RESET, 0); + + enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG, 0); + enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_MAX_BURST, ENET_DMA_MAX_BURST_LEN); + enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_MASK, 0); + + enet_write(enet, ring->st_ram_block + ENET_DMA_CH_STATE_RAM_BASE_DESC_PTR, + (uint32_t)ring->dma_addr); +} + +static void bcm4908_enet_dma_uninit(struct bcm4908_enet *enet) +{ + struct bcm4908_enet_dma_ring *rx_ring = &enet->rx_ring; + struct bcm4908_enet_dma_ring_slot *slot; + struct device *dev = enet->dev; + int i; + + for (i = rx_ring->length - 1; i >= 0; i--) { + slot = &rx_ring->slots[i]; + if (!slot->skb) + continue; + dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_FROM_DEVICE); + kfree_skb(slot->skb); + slot->skb = NULL; + } +} + +static int bcm4908_enet_dma_init(struct bcm4908_enet *enet) +{ + struct bcm4908_enet_dma_ring *rx_ring = &enet->rx_ring; + struct device *dev = enet->dev; + int err; + int i; + + for (i = 0; i < rx_ring->length; i++) { + err = bcm4908_enet_dma_alloc_rx_buf(enet, i); + if (err) { + dev_err(dev, "Failed to alloc RX buffer: %d\n", err); + bcm4908_enet_dma_uninit(enet); + return err; + } + } + + bcm4908_enet_dma_ring_init(enet, &enet->tx_ring); + bcm4908_enet_dma_ring_init(enet, &enet->rx_ring); + + return 0; +} + +static void bcm4908_enet_dma_tx_ring_enable(struct bcm4908_enet *enet, + struct bcm4908_enet_dma_ring *ring) +{ + enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG, ENET_DMA_CH_CFG_ENABLE); +} + +static void bcm4908_enet_dma_tx_ring_disable(struct bcm4908_enet *enet, + struct bcm4908_enet_dma_ring *ring) +{ + enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG, 0); +} + +static void bcm4908_enet_dma_rx_ring_enable(struct bcm4908_enet *enet, + struct bcm4908_enet_dma_ring *ring) +{ + enet_set(enet, ring->cfg_block + ENET_DMA_CH_CFG, ENET_DMA_CH_CFG_ENABLE); +} + +static void bcm4908_enet_dma_rx_ring_disable(struct bcm4908_enet *enet, + struct bcm4908_enet_dma_ring *ring) +{ + unsigned long deadline; + u32 tmp; + + enet_maskset(enet, ring->cfg_block + ENET_DMA_CH_CFG, ENET_DMA_CH_CFG_ENABLE, 0); + + deadline = jiffies + usecs_to_jiffies(2000); + do { + tmp = enet_read(enet, ring->cfg_block + ENET_DMA_CH_CFG); + if (!(tmp & ENET_DMA_CH_CFG_ENABLE)) + return; + enet_maskset(enet, ring->cfg_block + ENET_DMA_CH_CFG, ENET_DMA_CH_CFG_ENABLE, 0); + usleep_range(10, 30); + } while (!time_after_eq(jiffies, deadline)); + + dev_warn(enet->dev, "Timeout waiting for DMA TX stop\n"); +} + +/*** + * Ethernet driver + */ + +static void bcm4908_enet_gmac_init(struct bcm4908_enet *enet) +{ + u32 cmd; + + cmd = enet_umac_read(enet, UMAC_CMD); + enet_umac_write(enet, UMAC_CMD, cmd | CMD_SW_RESET); + enet_umac_write(enet, UMAC_CMD, cmd & ~CMD_SW_RESET); + + enet_set(enet, ENET_FLUSH, ENET_FLUSH_RXFIFO_FLUSH | ENET_FLUSH_TXFIFO_FLUSH); + enet_maskset(enet, ENET_FLUSH, ENET_FLUSH_RXFIFO_FLUSH | ENET_FLUSH_TXFIFO_FLUSH, 0); + + enet_set(enet, ENET_MIB_CTRL, ENET_MIB_CTRL_CLR_MIB); + enet_maskset(enet, ENET_MIB_CTRL, ENET_MIB_CTRL_CLR_MIB, 0); + + cmd = enet_umac_read(enet, UMAC_CMD); + cmd &= ~(CMD_SPEED_MASK << CMD_SPEED_SHIFT); + cmd &= ~CMD_TX_EN; + cmd &= ~CMD_RX_EN; + cmd |= CMD_SPEED_1000 << CMD_SPEED_SHIFT; + enet_umac_write(enet, UMAC_CMD, cmd); + + enet_maskset(enet, ENET_GMAC_STATUS, + ENET_GMAC_STATUS_ETH_SPEED_MASK | + ENET_GMAC_STATUS_HD | + ENET_GMAC_STATUS_AUTO_CFG_EN | + ENET_GMAC_STATUS_LINK_UP, + ENET_GMAC_STATUS_ETH_SPEED_1000 | + ENET_GMAC_STATUS_AUTO_CFG_EN | + ENET_GMAC_STATUS_LINK_UP); +} + +static irqreturn_t bcm4908_enet_irq_handler(int irq, void *dev_id) +{ + struct bcm4908_enet *enet = dev_id; + + bcm4908_enet_intrs_off(enet); + bcm4908_enet_intrs_ack(enet); + + napi_schedule(&enet->napi); + + return IRQ_HANDLED; +} + +static int bcm4908_enet_open(struct net_device *netdev) +{ + struct bcm4908_enet *enet = netdev_priv(netdev); + struct device *dev = enet->dev; + int err; + + err = request_irq(netdev->irq, bcm4908_enet_irq_handler, 0, "enet", enet); + if (err) { + dev_err(dev, "Failed to request IRQ %d: %d\n", netdev->irq, err); + return err; + } + + bcm4908_enet_gmac_init(enet); + bcm4908_enet_dma_reset(enet); + bcm4908_enet_dma_init(enet); + + enet_umac_set(enet, UMAC_CMD, CMD_TX_EN | CMD_RX_EN); + + enet_set(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_MASTER_EN); + enet_maskset(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_FLOWC_CH1_EN, 0); + bcm4908_enet_dma_rx_ring_enable(enet, &enet->rx_ring); + + napi_enable(&enet->napi); + netif_carrier_on(netdev); + netif_start_queue(netdev); + + bcm4908_enet_intrs_ack(enet); + bcm4908_enet_intrs_on(enet); + + return 0; +} + +static int bcm4908_enet_stop(struct net_device *netdev) +{ + struct bcm4908_enet *enet = netdev_priv(netdev); + + netif_stop_queue(netdev); + netif_carrier_off(netdev); + napi_disable(&enet->napi); + + bcm4908_enet_dma_rx_ring_disable(enet, &enet->rx_ring); + bcm4908_enet_dma_tx_ring_disable(enet, &enet->tx_ring); + + bcm4908_enet_dma_uninit(enet); + + free_irq(enet->netdev->irq, enet); + + return 0; +} + +static int bcm4908_enet_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct bcm4908_enet *enet = netdev_priv(netdev); + struct bcm4908_enet_dma_ring *ring = &enet->tx_ring; + struct bcm4908_enet_dma_ring_slot *slot; + struct device *dev = enet->dev; + struct bcm4908_enet_dma_ring_bd *buf_desc; + int free_buf_descs; + u32 tmp; + + /* Free transmitted skbs */ + while (ring->read_idx != ring->write_idx) { + buf_desc = &ring->buf_desc[ring->read_idx]; + if (le32_to_cpu(buf_desc->ctl) & DMA_CTL_STATUS_OWN) + break; + slot = &ring->slots[ring->read_idx]; + + dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_TO_DEVICE); + dev_kfree_skb(slot->skb); + if (++ring->read_idx == ring->length) + ring->read_idx = 0; + } + + /* Don't use the last empty buf descriptor */ + if (ring->read_idx <= ring->write_idx) + free_buf_descs = ring->read_idx - ring->write_idx + ring->length; + else + free_buf_descs = ring->read_idx - ring->write_idx; + if (free_buf_descs < 2) + return NETDEV_TX_BUSY; + + /* Hardware removes OWN bit after sending data */ + buf_desc = &ring->buf_desc[ring->write_idx]; + if (unlikely(le32_to_cpu(buf_desc->ctl) & DMA_CTL_STATUS_OWN)) { + netif_stop_queue(netdev); + return NETDEV_TX_BUSY; + } + + slot = &ring->slots[ring->write_idx]; + slot->skb = skb; + slot->len = skb->len; + slot->dma_addr = dma_map_single(dev, skb->data, skb->len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, slot->dma_addr))) + return NETDEV_TX_BUSY; + + tmp = skb->len << DMA_CTL_LEN_DESC_BUFLENGTH_SHIFT; + tmp |= DMA_CTL_STATUS_OWN; + tmp |= DMA_CTL_STATUS_SOP; + tmp |= DMA_CTL_STATUS_EOP; + tmp |= DMA_CTL_STATUS_APPEND_CRC; + if (ring->write_idx + 1 == ring->length - 1) + tmp |= DMA_CTL_STATUS_WRAP; + + buf_desc->addr = cpu_to_le32((uint32_t)slot->dma_addr); + buf_desc->ctl = cpu_to_le32(tmp); + + bcm4908_enet_dma_tx_ring_enable(enet, &enet->tx_ring); + + if (++ring->write_idx == ring->length - 1) + ring->write_idx = 0; + enet->netdev->stats.tx_bytes += skb->len; + enet->netdev->stats.tx_packets++; + + return NETDEV_TX_OK; +} + +static int bcm4908_enet_poll(struct napi_struct *napi, int weight) +{ + struct bcm4908_enet *enet = container_of(napi, struct bcm4908_enet, napi); + struct device *dev = enet->dev; + int handled = 0; + + while (handled < weight) { + struct bcm4908_enet_dma_ring_bd *buf_desc; + struct bcm4908_enet_dma_ring_slot slot; + u32 ctl; + int len; + int err; + + buf_desc = &enet->rx_ring.buf_desc[enet->rx_ring.read_idx]; + ctl = le32_to_cpu(buf_desc->ctl); + if (ctl & DMA_CTL_STATUS_OWN) + break; + + slot = enet->rx_ring.slots[enet->rx_ring.read_idx]; + + /* Provide new buffer before unpinning the old one */ + err = bcm4908_enet_dma_alloc_rx_buf(enet, enet->rx_ring.read_idx); + if (err) + break; + + if (++enet->rx_ring.read_idx == enet->rx_ring.length) + enet->rx_ring.read_idx = 0; + + len = (ctl & DMA_CTL_LEN_DESC_BUFLENGTH) >> DMA_CTL_LEN_DESC_BUFLENGTH_SHIFT; + + if (len < ENET_MTU_MIN || + (ctl & (DMA_CTL_STATUS_SOP | DMA_CTL_STATUS_EOP)) != (DMA_CTL_STATUS_SOP | DMA_CTL_STATUS_EOP)) { + enet->netdev->stats.rx_dropped++; + break; + } + + dma_unmap_single(dev, slot.dma_addr, slot.len, DMA_FROM_DEVICE); + + skb_put(slot.skb, len - ETH_FCS_LEN); + slot.skb->protocol = eth_type_trans(slot.skb, enet->netdev); + netif_receive_skb(slot.skb); + + enet->netdev->stats.rx_packets++; + enet->netdev->stats.rx_bytes += len; + } + + if (handled < weight) { + napi_complete_done(napi, handled); + bcm4908_enet_intrs_on(enet); + } + + return handled; +} + +static const struct net_device_ops bcm4908_enet_netdev_ops = { + .ndo_open = bcm4908_enet_open, + .ndo_stop = bcm4908_enet_stop, + .ndo_start_xmit = bcm4908_enet_start_xmit, + .ndo_set_mac_address = eth_mac_addr, +}; + +static int bcm4908_enet_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct net_device *netdev; + struct bcm4908_enet *enet; + int err; + + netdev = devm_alloc_etherdev(dev, sizeof(*enet)); + if (!netdev) + return -ENOMEM; + + enet = netdev_priv(netdev); + enet->dev = dev; + enet->netdev = netdev; + + enet->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(enet->base)) { + dev_err(dev, "Failed to map registers: %ld\n", PTR_ERR(enet->base)); + return PTR_ERR(enet->base); + } + + netdev->irq = platform_get_irq_byname(pdev, "rx"); + if (netdev->irq < 0) + return netdev->irq; + + dma_set_coherent_mask(dev, DMA_BIT_MASK(32)); + + err = bcm4908_enet_dma_alloc(enet); + if (err) + return err; + + SET_NETDEV_DEV(netdev, &pdev->dev); + eth_hw_addr_random(netdev); + netdev->netdev_ops = &bcm4908_enet_netdev_ops; + netdev->min_mtu = ETH_ZLEN; + netdev->mtu = ENET_MTU_MAX; + netdev->max_mtu = ENET_MTU_MAX; + netif_napi_add(netdev, &enet->napi, bcm4908_enet_poll, 64); + + err = register_netdev(netdev); + if (err) { + bcm4908_enet_dma_free(enet); + return err; + } + + platform_set_drvdata(pdev, enet); + + return 0; +} + +static int bcm4908_enet_remove(struct platform_device *pdev) +{ + struct bcm4908_enet *enet = platform_get_drvdata(pdev); + + unregister_netdev(enet->netdev); + netif_napi_del(&enet->napi); + bcm4908_enet_dma_free(enet); + + return 0; +} + +static const struct of_device_id bcm4908_enet_of_match[] = { + { .compatible = "brcm,bcm4908-enet"}, + {}, +}; + +static struct platform_driver bcm4908_enet_driver = { + .driver = { + .name = "bcm4908_enet", + .of_match_table = bcm4908_enet_of_match, + }, + .probe = bcm4908_enet_probe, + .remove = bcm4908_enet_remove, +}; +module_platform_driver(bcm4908_enet_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DEVICE_TABLE(of, bcm4908_enet_of_match); diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.h b/drivers/net/ethernet/broadcom/bcm4908_enet.h new file mode 100644 index 000000000000..8a3ede2da537 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bcm4908_enet.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __BCM4908_ENET_H +#define __BCM4908_ENET_H + +#define ENET_CONTROL 0x000 +#define ENET_MIB_CTRL 0x004 +#define ENET_MIB_CTRL_CLR_MIB 0x00000001 +#define ENET_RX_ERR_MASK 0x008 +#define ENET_MIB_MAX_PKT_SIZE 0x00C +#define ENET_MIB_MAX_PKT_SIZE_VAL 0x00003fff +#define ENET_DIAG_OUT 0x01c +#define ENET_ENABLE_DROP_PKT 0x020 +#define ENET_IRQ_ENABLE 0x024 +#define ENET_IRQ_ENABLE_OVFL 0x00000001 +#define ENET_GMAC_STATUS 0x028 +#define ENET_GMAC_STATUS_ETH_SPEED_MASK 0x00000003 +#define ENET_GMAC_STATUS_ETH_SPEED_10 0x00000000 +#define ENET_GMAC_STATUS_ETH_SPEED_100 0x00000001 +#define ENET_GMAC_STATUS_ETH_SPEED_1000 0x00000002 +#define ENET_GMAC_STATUS_HD 0x00000004 +#define ENET_GMAC_STATUS_AUTO_CFG_EN 0x00000008 +#define ENET_GMAC_STATUS_LINK_UP 0x00000010 +#define ENET_IRQ_STATUS 0x02c +#define ENET_IRQ_STATUS_OVFL 0x00000001 +#define ENET_OVERFLOW_COUNTER 0x030 +#define ENET_FLUSH 0x034 +#define ENET_FLUSH_RXFIFO_FLUSH 0x00000001 +#define ENET_FLUSH_TXFIFO_FLUSH 0x00000002 +#define ENET_RSV_SELECT 0x038 +#define ENET_BP_FORCE 0x03c +#define ENET_BP_FORCE_FORCE 0x00000001 +#define ENET_DMA_RX_OK_TO_SEND_COUNT 0x040 +#define ENET_DMA_RX_OK_TO_SEND_COUNT_VAL 0x0000000f +#define ENET_TX_CRC_CTRL 0x044 +#define ENET_MIB 0x200 +#define ENET_UNIMAC 0x400 +#define ENET_DMA 0x800 +#define ENET_DMA_CONTROLLER_CFG 0x800 +#define ENET_DMA_CTRL_CFG_MASTER_EN 0x00000001 +#define ENET_DMA_CTRL_CFG_FLOWC_CH1_EN 0x00000002 +#define ENET_DMA_CTRL_CFG_FLOWC_CH3_EN 0x00000004 +#define ENET_DMA_FLOWCTL_CH1_THRESH_LO 0x804 +#define ENET_DMA_FLOWCTL_CH1_THRESH_HI 0x808 +#define ENET_DMA_FLOWCTL_CH1_ALLOC 0x80c +#define ENET_DMA_FLOWCTL_CH1_ALLOC_FORCE 0x80000000 +#define ENET_DMA_FLOWCTL_CH3_THRESH_LO 0x810 +#define ENET_DMA_FLOWCTL_CH3_THRESH_HI 0x814 +#define ENET_DMA_FLOWCTL_CH3_ALLOC 0x818 +#define ENET_DMA_FLOWCTL_CH5_THRESH_LO 0x81C +#define ENET_DMA_FLOWCTL_CH5_THRESH_HI 0x820 +#define ENET_DMA_FLOWCTL_CH5_ALLOC 0x824 +#define ENET_DMA_FLOWCTL_CH7_THRESH_LO 0x828 +#define ENET_DMA_FLOWCTL_CH7_THRESH_HI 0x82C +#define ENET_DMA_FLOWCTL_CH7_ALLOC 0x830 +#define ENET_DMA_CTRL_CHANNEL_RESET 0x834 +#define ENET_DMA_CTRL_CHANNEL_DEBUG 0x838 +#define ENET_DMA_CTRL_GLOBAL_INTERRUPT_STATUS 0x840 +#define ENET_DMA_CTRL_GLOBAL_INTERRUPT_MASK 0x844 +#define ENET_DMA_CH0_CFG 0xa00 /* RX */ +#define ENET_DMA_CH1_CFG 0xa10 /* TX */ +#define ENET_DMA_CH0_STATE_RAM 0xc00 /* RX */ +#define ENET_DMA_CH1_STATE_RAM 0xc10 /* TX */ + +#define ENET_DMA_CH_CFG 0x00 /* assorted configuration */ +#define ENET_DMA_CH_CFG_ENABLE 0x00000001 /* set to enable channel */ +#define ENET_DMA_CH_CFG_PKT_HALT 0x00000002 /* idle after an EOP flag is detected */ +#define ENET_DMA_CH_CFG_BURST_HALT 0x00000004 /* idle after finish current memory burst */ +#define ENET_DMA_CH_CFG_INT_STAT 0x04 /* interrupts control and status */ +#define ENET_DMA_CH_CFG_INT_MASK 0x08 /* interrupts mask */ +#define ENET_DMA_CH_CFG_INT_BUFF_DONE 0x00000001 /* buffer done */ +#define ENET_DMA_CH_CFG_INT_DONE 0x00000002 /* packet xfer complete */ +#define ENET_DMA_CH_CFG_INT_NO_DESC 0x00000004 /* no valid descriptors */ +#define ENET_DMA_CH_CFG_INT_RX_ERROR 0x00000008 /* rxdma detect client protocol error */ +#define ENET_DMA_CH_CFG_MAX_BURST 0x0c /* max burst length permitted */ +#define ENET_DMA_CH_CFG_MAX_BURST_DESCSIZE_SEL 0x00040000 /* DMA Descriptor Size Selection */ +#define ENET_DMA_CH_CFG_SIZE 0x10 + +#define ENET_DMA_CH_STATE_RAM_BASE_DESC_PTR 0x00 /* descriptor ring start address */ +#define ENET_DMA_CH_STATE_RAM_STATE_DATA 0x04 /* state/bytes done/ring offset */ +#define ENET_DMA_CH_STATE_RAM_DESC_LEN_STATUS 0x08 /* buffer descriptor status and len */ +#define ENET_DMA_CH_STATE_RAM_DESC_BASE_BUFPTR 0x0c /* buffer descrpitor current processing */ +#define ENET_DMA_CH_STATE_RAM_SIZE 0x10 + +#define DMA_CTL_STATUS_APPEND_CRC 0x00000100 +#define DMA_CTL_STATUS_APPEND_BRCM_TAG 0x00000200 +#define DMA_CTL_STATUS_PRIO 0x00000C00 /* Prio for Tx */ +#define DMA_CTL_STATUS_WRAP 0x00001000 /* */ +#define DMA_CTL_STATUS_SOP 0x00002000 /* first buffer in packet */ +#define DMA_CTL_STATUS_EOP 0x00004000 /* last buffer in packet */ +#define DMA_CTL_STATUS_OWN 0x00008000 /* cleared by DMA, set by SW */ +#define DMA_CTL_LEN_DESC_BUFLENGTH 0x0fff0000 +#define DMA_CTL_LEN_DESC_BUFLENGTH_SHIFT 16 +#define DMA_CTL_LEN_DESC_MULTICAST 0x40000000 +#define DMA_CTL_LEN_DESC_USEFPM 0x80000000 + +#endif diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index 876f90e5795e..d5218e74284c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -220,9 +220,6 @@ struct cudbg_mps_tcam { u8 reserved[2]; }; -#define CUDBG_VPD_PF_SIZE 0x800 -#define CUDBG_SCFG_VER_ADDR 0x06 -#define CUDBG_SCFG_VER_LEN 4 #define CUDBG_VPD_VER_ADDR 0x18c7 #define CUDBG_VPD_VER_LEN 2 diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 75474f810249..6c85a10f465c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -2686,10 +2686,10 @@ int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init, struct adapter *padap = pdbg_init->adap; struct cudbg_buffer temp_buff = { 0 }; char vpd_str[CUDBG_VPD_VER_LEN + 1]; - u32 scfg_vers, vpd_vers, fw_vers; struct cudbg_vpd_data *vpd_data; struct vpd_params vpd = { 0 }; - int rc, ret; + u32 vpd_vers, fw_vers; + int rc; rc = t4_get_raw_vpd_params(padap, &vpd); if (rc) @@ -2699,24 +2699,6 @@ int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init, if (rc) return rc; - /* Serial Configuration Version is located beyond the PF's vpd size. - * Temporarily give access to entire EEPROM to get it. - */ - rc = pci_set_vpd_size(padap->pdev, EEPROMVSIZE); - if (rc < 0) - return rc; - - ret = cudbg_read_vpd_reg(padap, CUDBG_SCFG_VER_ADDR, CUDBG_SCFG_VER_LEN, - &scfg_vers); - - /* Restore back to original PF's vpd size */ - rc = pci_set_vpd_size(padap->pdev, CUDBG_VPD_PF_SIZE); - if (rc < 0) - return rc; - - if (ret) - return ret; - rc = cudbg_read_vpd_reg(padap, CUDBG_VPD_VER_ADDR, CUDBG_VPD_VER_LEN, vpd_str); if (rc) @@ -2737,7 +2719,7 @@ int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init, memcpy(vpd_data->bn, vpd.pn, PN_LEN + 1); memcpy(vpd_data->na, vpd.na, MACADDR_LEN + 1); memcpy(vpd_data->mn, vpd.id, ID_LEN + 1); - vpd_data->scfg_vers = scfg_vers; + vpd_data->scfg_vers = t4_read_reg(padap, PCIE_STATIC_SPARE2_A); vpd_data->vpd_vers = vpd_vers; vpd_data->fw_major = FW_HDR_FW_VER_MAJOR_G(fw_vers); vpd_data->fw_minor = FW_HDR_FW_VER_MINOR_G(fw_vers); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 8e681ce72d62..314f8d806723 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -414,7 +414,6 @@ struct pf_resources { }; struct pci_params { - unsigned int vpd_cap_addr; unsigned char speed; unsigned char width; }; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 9f1965c80fb1..6264bc66a4fc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3201,8 +3201,6 @@ static void cxgb4_mgmt_fill_vf_station_mac_addr(struct adapter *adap) int err; u8 *na; - adap->params.pci.vpd_cap_addr = pci_find_capability(adap->pdev, - PCI_CAP_ID_VPD); err = t4_get_raw_vpd_params(adap, &adap->params.vpd); if (err) return; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index 0c5373462ced..0b1b5f9c67d4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -219,6 +219,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x6089), /* Custom T62100-KR */ CH_PCI_ID_TABLE_FENTRY(0x608a), /* Custom T62100-CR */ CH_PCI_ID_TABLE_FENTRY(0x608b), /* Custom T6225-CR */ + CH_PCI_ID_TABLE_FENTRY(0x6092), /* Custom T62100-CR-LOM */ CH_PCI_DEVICE_ID_TABLE_DEFINE_END; #endif /* __T4_PCI_ID_TBL_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index b11a172b5174..695916ba0405 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -884,6 +884,12 @@ #define TDUE_V(x) ((x) << TDUE_S) #define TDUE_F TDUE_V(1U) +/* SPARE2 register contains 32-bit value at offset 0x6 in Serial INIT + * Configuration flashed on EEPROM. This value corresponds to 32-bit + * Serial Configuration Version information. + */ +#define PCIE_STATIC_SPARE2_A 0x5bfc + /* registers for module MC */ #define MC_INT_CAUSE_A 0x7518 #define MC_P_INT_CAUSE_A 0x41318 diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index d8e568f6caf3..ccfe52a50a66 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2180,8 +2180,10 @@ static int dpaa_a050385_wa_xdpf(struct dpaa_priv *priv, struct xdp_frame **init_xdpf) { struct xdp_frame *new_xdpf, *xdpf = *init_xdpf; - void *new_buff; + void *new_buff, *aligned_data; struct page *p; + u32 data_shift; + int headroom; /* Check the data alignment and make sure the headroom is large * enough to store the xdpf backpointer. Use an aligned headroom @@ -2191,25 +2193,57 @@ static int dpaa_a050385_wa_xdpf(struct dpaa_priv *priv, * byte frame headroom. If the XDP program uses all of it, copy the * data to a new buffer and make room for storing the backpointer. */ - if (PTR_IS_ALIGNED(xdpf->data, DPAA_A050385_ALIGN) && + if (PTR_IS_ALIGNED(xdpf->data, DPAA_FD_DATA_ALIGNMENT) && xdpf->headroom >= priv->tx_headroom) { xdpf->headroom = priv->tx_headroom; return 0; } + /* Try to move the data inside the buffer just enough to align it and + * store the xdpf backpointer. If the available headroom isn't large + * enough, resort to allocating a new buffer and copying the data. + */ + aligned_data = PTR_ALIGN_DOWN(xdpf->data, DPAA_FD_DATA_ALIGNMENT); + data_shift = xdpf->data - aligned_data; + + /* The XDP frame's headroom needs to be large enough to accommodate + * shifting the data as well as storing the xdpf backpointer. + */ + if (xdpf->headroom >= data_shift + priv->tx_headroom) { + memmove(aligned_data, xdpf->data, xdpf->len); + xdpf->data = aligned_data; + xdpf->headroom = priv->tx_headroom; + return 0; + } + + /* The new xdp_frame is stored in the new buffer. Reserve enough space + * in the headroom for storing it along with the driver's private + * info. The headroom needs to be aligned to DPAA_FD_DATA_ALIGNMENT to + * guarantee the data's alignment in the buffer. + */ + headroom = ALIGN(sizeof(*new_xdpf) + priv->tx_headroom, + DPAA_FD_DATA_ALIGNMENT); + + /* Assure the extended headroom and data don't overflow the buffer, + * while maintaining the mandatory tailroom. + */ + if (headroom + xdpf->len > DPAA_BP_RAW_SIZE - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + return -ENOMEM; + p = dev_alloc_pages(0); if (unlikely(!p)) return -ENOMEM; /* Copy the data to the new buffer at a properly aligned offset */ new_buff = page_address(p); - memcpy(new_buff + priv->tx_headroom, xdpf->data, xdpf->len); + memcpy(new_buff + headroom, xdpf->data, xdpf->len); /* Create an XDP frame around the new buffer in a similar fashion * to xdp_convert_buff_to_frame. */ new_xdpf = new_buff; - new_xdpf->data = new_buff + priv->tx_headroom; + new_xdpf->data = new_buff + headroom; new_xdpf->len = xdpf->len; new_xdpf->headroom = priv->tx_headroom; new_xdpf->frame_sz = DPAA_BP_RAW_SIZE; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index e1e950d48c92..c71fe8d751d5 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -196,6 +196,8 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_CBS_BW_MASK GENMASK(6, 0) #define ENETC_PTCCBSR1(n) (0x1114 + (n) * 8) /* n = 0 to 7*/ #define ENETC_RSSHASH_KEY_SIZE 40 +#define ENETC_PRSSCAPR 0x1404 +#define ENETC_PRSSCAPR_GET_NUM_RSS(val) (BIT((val) & 0xf) * 32) #define ENETC_PRSSK(n) (0x1410 + (n) * 4) /* n = [0..9] */ #define ENETC_PSIVLANFMR 0x1700 #define ENETC_PSIVLANFMR_VS BIT(0) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index ed8fcb8b486e..3eb5f1375bd4 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -996,6 +996,51 @@ static void enetc_phylink_destroy(struct enetc_ndev_priv *priv) phylink_destroy(priv->phylink); } +/* Initialize the entire shared memory for the flow steering entries + * of this port (PF + VFs) + */ +static int enetc_init_port_rfs_memory(struct enetc_si *si) +{ + struct enetc_cmd_rfse rfse = {0}; + struct enetc_hw *hw = &si->hw; + int num_rfs, i, err = 0; + u32 val; + + val = enetc_port_rd(hw, ENETC_PRFSCAPR); + num_rfs = ENETC_PRFSCAPR_GET_NUM_RFS(val); + + for (i = 0; i < num_rfs; i++) { + err = enetc_set_fs_entry(si, &rfse, i); + if (err) + break; + } + + return err; +} + +static int enetc_init_port_rss_memory(struct enetc_si *si) +{ + struct enetc_hw *hw = &si->hw; + int num_rss, err; + int *rss_table; + u32 val; + + val = enetc_port_rd(hw, ENETC_PRSSCAPR); + num_rss = ENETC_PRSSCAPR_GET_NUM_RSS(val); + if (!num_rss) + return 0; + + rss_table = kcalloc(num_rss, sizeof(*rss_table), GFP_KERNEL); + if (!rss_table) + return -ENOMEM; + + err = enetc_set_rss_table(si, rss_table, num_rss); + + kfree(rss_table); + + return err; +} + static int enetc_pf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -1051,6 +1096,18 @@ static int enetc_pf_probe(struct pci_dev *pdev, goto err_alloc_si_res; } + err = enetc_init_port_rfs_memory(si); + if (err) { + dev_err(&pdev->dev, "Failed to initialize RFS memory\n"); + goto err_init_port_rfs; + } + + err = enetc_init_port_rss_memory(si); + if (err) { + dev_err(&pdev->dev, "Failed to initialize RSS memory\n"); + goto err_init_port_rss; + } + err = enetc_alloc_msix(priv); if (err) { dev_err(&pdev->dev, "MSIX alloc failed\n"); @@ -1079,6 +1136,8 @@ err_phylink_create: enetc_mdiobus_destroy(pf); err_mdiobus_create: enetc_free_msix(priv); +err_init_port_rss: +err_init_port_rfs: err_alloc_msix: enetc_free_si_resources(priv); err_alloc_si_res: diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index e20a1b3267b9..e9e60a935f40 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -272,7 +272,7 @@ struct hnae3_ring_chain_node { }; #define HNAE3_IS_TX_RING(node) \ - (((node)->flag & (1 << HNAE3_RING_TYPE_B)) == HNAE3_RING_TYPE_TX) + (((node)->flag & 1 << HNAE3_RING_TYPE_B) == HNAE3_RING_TYPE_TX) /* device specification info from firmware */ struct hnae3_dev_specs { @@ -292,7 +292,6 @@ struct hnae3_client_ops { int (*init_instance)(struct hnae3_handle *handle); void (*uninit_instance)(struct hnae3_handle *handle, bool reset); void (*link_status_change)(struct hnae3_handle *handle, bool state); - int (*setup_tc)(struct hnae3_handle *handle, u8 tc); int (*reset_notify)(struct hnae3_handle *handle, enum hnae3_reset_notify_type type); void (*process_hw_error)(struct hnae3_handle *handle, @@ -776,9 +775,9 @@ struct hnae3_handle { #define hnae3_get_field(origin, mask, shift) (((origin) & (mask)) >> (shift)) #define hnae3_set_bit(origin, shift, val) \ - hnae3_set_field((origin), (0x1 << (shift)), (shift), (val)) + hnae3_set_field(origin, 0x1 << (shift), shift, val) #define hnae3_get_bit(origin, shift) \ - hnae3_get_field((origin), (0x1 << (shift)), (shift)) + hnae3_get_field(origin, 0x1 << (shift), shift) #define HNAE3_DBG_TM_NODES "tm_nodes" #define HNAE3_DBG_TM_PRI "tm_priority" diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 36c7813b5996..dd11c57027bb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -162,7 +162,7 @@ static int hns3_dbg_queue_map(struct hnae3_handle *h) continue; dev_info(&h->pdev->dev, - " %4d %4d %4d\n", + " %4d %4u %4d\n", i, global_qid, priv->ring[i].tqp_vector->vector_irq); } @@ -423,6 +423,30 @@ static ssize_t hns3_dbg_cmd_read(struct file *filp, char __user *buffer, return (*ppos = len); } +static int hns3_dbg_check_cmd(struct hnae3_handle *handle, char *cmd_buf) +{ + int ret = 0; + + if (strncmp(cmd_buf, "help", 4) == 0) + hns3_dbg_help(handle); + else if (strncmp(cmd_buf, "queue info", 10) == 0) + ret = hns3_dbg_queue_info(handle, cmd_buf); + else if (strncmp(cmd_buf, "queue map", 9) == 0) + ret = hns3_dbg_queue_map(handle); + else if (strncmp(cmd_buf, "bd info", 7) == 0) + ret = hns3_dbg_bd_info(handle, cmd_buf); + else if (strncmp(cmd_buf, "dev capability", 14) == 0) + hns3_dbg_dev_caps(handle); + else if (strncmp(cmd_buf, "dev spec", 8) == 0) + hns3_dbg_dev_specs(handle); + else if (handle->ae_algo->ops->dbg_run_cmd) + ret = handle->ae_algo->ops->dbg_run_cmd(handle, cmd_buf); + else + ret = -EOPNOTSUPP; + + return ret; +} + static ssize_t hns3_dbg_cmd_write(struct file *filp, const char __user *buffer, size_t count, loff_t *ppos) { @@ -430,7 +454,7 @@ static ssize_t hns3_dbg_cmd_write(struct file *filp, const char __user *buffer, struct hns3_nic_priv *priv = handle->priv; char *cmd_buf, *cmd_buf_tmp; int uncopied_bytes; - int ret = 0; + int ret; if (*ppos != 0) return 0; @@ -461,23 +485,7 @@ static ssize_t hns3_dbg_cmd_write(struct file *filp, const char __user *buffer, count = cmd_buf_tmp - cmd_buf + 1; } - if (strncmp(cmd_buf, "help", 4) == 0) - hns3_dbg_help(handle); - else if (strncmp(cmd_buf, "queue info", 10) == 0) - ret = hns3_dbg_queue_info(handle, cmd_buf); - else if (strncmp(cmd_buf, "queue map", 9) == 0) - ret = hns3_dbg_queue_map(handle); - else if (strncmp(cmd_buf, "bd info", 7) == 0) - ret = hns3_dbg_bd_info(handle, cmd_buf); - else if (strncmp(cmd_buf, "dev capability", 14) == 0) - hns3_dbg_dev_caps(handle); - else if (strncmp(cmd_buf, "dev spec", 8) == 0) - hns3_dbg_dev_specs(handle); - else if (handle->ae_algo->ops->dbg_run_cmd) - ret = handle->ae_algo->ops->dbg_run_cmd(handle, cmd_buf); - else - ret = -EOPNOTSUPP; - + ret = hns3_dbg_check_cmd(handle, cmd_buf); if (ret) hns3_dbg_help(handle); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index cf16d5f31f26..bf4302a5cf95 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -32,7 +32,7 @@ #define CREATE_TRACE_POINTS #include "hns3_trace.h" -#define hns3_set_field(origin, shift, val) ((origin) |= ((val) << (shift))) +#define hns3_set_field(origin, shift, val) ((origin) |= (val) << (shift)) #define hns3_tx_bd_count(S) DIV_ROUND_UP(S, HNS3_MAX_BD_SIZE) #define hns3_rl_err(fmt, ...) \ @@ -2329,7 +2329,7 @@ static pci_ers_result_t hns3_error_detected(struct pci_dev *pdev, struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); pci_ers_result_t ret; - dev_info(&pdev->dev, "PCI error detected, state(=%d)!!\n", state); + dev_info(&pdev->dev, "PCI error detected, state(=%u)!!\n", state); if (state == pci_channel_io_perm_failure) return PCI_ERS_RESULT_DISCONNECT; @@ -4084,7 +4084,7 @@ out_when_alloc_ring_memory: return -ENOMEM; } -int hns3_uninit_all_ring(struct hns3_nic_priv *priv) +static void hns3_uninit_all_ring(struct hns3_nic_priv *priv) { struct hnae3_handle *h = priv->ae_handle; int i; @@ -4093,7 +4093,6 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv) hns3_fini_ring(&priv->ring[i]); hns3_fini_ring(&priv->ring[i + h->kinfo.num_tqps]); } - return 0; } /* Set mac addr if it is configured. or leave it to the AE driver */ @@ -4321,7 +4320,6 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset) { struct net_device *netdev = handle->kinfo.netdev; struct hns3_nic_priv *priv = netdev_priv(netdev); - int ret; if (netdev->reg_state != NETREG_UNINITIALIZED) unregister_netdev(netdev); @@ -4347,9 +4345,7 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset) hns3_nic_dealloc_vector_data(priv); - ret = hns3_uninit_all_ring(priv); - if (ret) - netdev_err(netdev, "uninit ring error\n"); + hns3_uninit_all_ring(priv); hns3_put_ring_config(priv); @@ -4378,20 +4374,6 @@ static void hns3_link_status_change(struct hnae3_handle *handle, bool linkup) } } -static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc) -{ - struct hnae3_knic_private_info *kinfo = &handle->kinfo; - struct net_device *ndev = kinfo->netdev; - - if (tc > HNAE3_MAX_TC) - return -EINVAL; - - if (!ndev) - return -ENODEV; - - return hns3_nic_set_real_num_queue(ndev); -} - static void hns3_clear_tx_ring(struct hns3_enet_ring *ring) { while (ring->next_to_clean != ring->next_to_use) { @@ -4658,7 +4640,6 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle) { struct net_device *netdev = handle->kinfo.netdev; struct hns3_nic_priv *priv = netdev_priv(netdev); - int ret; if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) { netdev_warn(netdev, "already uninitialized\n"); @@ -4676,13 +4657,11 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle) hns3_nic_dealloc_vector_data(priv); - ret = hns3_uninit_all_ring(priv); - if (ret) - netdev_err(netdev, "uninit ring error\n"); + hns3_uninit_all_ring(priv); hns3_put_ring_config(priv); - return ret; + return 0; } static int hns3_reset_notify(struct hnae3_handle *handle, @@ -4828,7 +4807,6 @@ static const struct hnae3_client_ops client_ops = { .init_instance = hns3_client_init, .uninit_instance = hns3_client_uninit, .link_status_change = hns3_link_status_change, - .setup_tc = hns3_client_setup_tc, .reset_notify = hns3_reset_notify, .process_hw_error = hns3_process_hw_error, }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index d70af1d0d554..d069b04ee587 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -554,7 +554,7 @@ static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value) } #define hns3_read_dev(a, reg) \ - hns3_read_reg((a)->io_base, (reg)) + hns3_read_reg((a)->io_base, reg) static inline bool hns3_nic_resetting(struct net_device *netdev) { @@ -564,7 +564,7 @@ static inline bool hns3_nic_resetting(struct net_device *netdev) } #define hns3_write_dev(a, reg, value) \ - hns3_write_reg((a)->io_base, (reg), (value)) + hns3_write_reg((a)->io_base, reg, value) #define ring_to_dev(ring) ((ring)->dev) @@ -588,15 +588,15 @@ static inline unsigned int hns3_page_order(struct hns3_enet_ring *ring) /* iterator for handling rings in ring group */ #define hns3_for_each_ring(pos, head) \ - for (pos = (head).ring; pos; pos = pos->next) + for (pos = (head).ring; (pos); pos = (pos)->next) #define hns3_get_handle(ndev) \ (((struct hns3_nic_priv *)netdev_priv(ndev))->ae_handle) -#define hns3_gl_usec_to_reg(int_gl) (int_gl >> 1) +#define hns3_gl_usec_to_reg(int_gl) ((int_gl) >> 1) #define hns3_gl_round_down(int_gl) round_down(int_gl, 2) -#define hns3_rl_usec_to_reg(int_rl) (int_rl >> 2) +#define hns3_rl_usec_to_reg(int_rl) ((int_rl) >> 2) #define hns3_rl_round_down(int_rl) round_down(int_rl, 4) void hns3_ethtool_set_ops(struct net_device *netdev); @@ -605,7 +605,6 @@ int hns3_set_channels(struct net_device *netdev, void hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget); int hns3_init_all_ring(struct hns3_nic_priv *priv); -int hns3_uninit_all_ring(struct hns3_nic_priv *priv); int hns3_nic_reset_all_ring(struct hnae3_handle *h); void hns3_fini_ring(struct hns3_enet_ring *ring); netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 79e0a9b14b68..adcec4ea7cb9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -456,7 +456,7 @@ static void *hns3_update_strings(u8 *data, const struct hns3_stats *stats, data[ETH_GSTRING_LEN - 1] = '\0'; /* first, prepend the prefix string */ - n1 = scnprintf(data, MAX_PREFIX_SIZE, "%s%d_", + n1 = scnprintf(data, MAX_PREFIX_SIZE, "%s%u_", prefix, i); size_left = (ETH_GSTRING_LEN - 1) - n1; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 6546b47bef88..1bd0ddfaec4d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -189,38 +189,53 @@ static bool hclge_is_special_opcode(u16 opcode) return false; } -static int hclge_cmd_convert_err_code(u16 desc_ret) +struct errcode { + u32 imp_errcode; + int common_errno; +}; + +static void hclge_cmd_copy_desc(struct hclge_hw *hw, struct hclge_desc *desc, + int num) { - switch (desc_ret) { - case HCLGE_CMD_EXEC_SUCCESS: - return 0; - case HCLGE_CMD_NO_AUTH: - return -EPERM; - case HCLGE_CMD_NOT_SUPPORTED: - return -EOPNOTSUPP; - case HCLGE_CMD_QUEUE_FULL: - return -EXFULL; - case HCLGE_CMD_NEXT_ERR: - return -ENOSR; - case HCLGE_CMD_UNEXE_ERR: - return -ENOTBLK; - case HCLGE_CMD_PARA_ERR: - return -EINVAL; - case HCLGE_CMD_RESULT_ERR: - return -ERANGE; - case HCLGE_CMD_TIMEOUT: - return -ETIME; - case HCLGE_CMD_HILINK_ERR: - return -ENOLINK; - case HCLGE_CMD_QUEUE_ILLEGAL: - return -ENXIO; - case HCLGE_CMD_INVALID: - return -EBADR; - default: - return -EIO; + struct hclge_desc *desc_to_use; + int handle = 0; + + while (handle < num) { + desc_to_use = &hw->cmq.csq.desc[hw->cmq.csq.next_to_use]; + *desc_to_use = desc[handle]; + (hw->cmq.csq.next_to_use)++; + if (hw->cmq.csq.next_to_use >= hw->cmq.csq.desc_num) + hw->cmq.csq.next_to_use = 0; + handle++; } } +static int hclge_cmd_convert_err_code(u16 desc_ret) +{ + struct errcode hclge_cmd_errcode[] = { + {HCLGE_CMD_EXEC_SUCCESS, 0}, + {HCLGE_CMD_NO_AUTH, -EPERM}, + {HCLGE_CMD_NOT_SUPPORTED, -EOPNOTSUPP}, + {HCLGE_CMD_QUEUE_FULL, -EXFULL}, + {HCLGE_CMD_NEXT_ERR, -ENOSR}, + {HCLGE_CMD_UNEXE_ERR, -ENOTBLK}, + {HCLGE_CMD_PARA_ERR, -EINVAL}, + {HCLGE_CMD_RESULT_ERR, -ERANGE}, + {HCLGE_CMD_TIMEOUT, -ETIME}, + {HCLGE_CMD_HILINK_ERR, -ENOLINK}, + {HCLGE_CMD_QUEUE_ILLEGAL, -ENXIO}, + {HCLGE_CMD_INVALID, -EBADR}, + }; + u32 errcode_count = ARRAY_SIZE(hclge_cmd_errcode); + u32 i; + + for (i = 0; i < errcode_count; i++) + if (hclge_cmd_errcode[i].imp_errcode == desc_ret) + return hclge_cmd_errcode[i].common_errno; + + return -EIO; +} + static int hclge_cmd_check_retval(struct hclge_hw *hw, struct hclge_desc *desc, int num, int ntc) { @@ -244,6 +259,44 @@ static int hclge_cmd_check_retval(struct hclge_hw *hw, struct hclge_desc *desc, return hclge_cmd_convert_err_code(desc_ret); } +static int hclge_cmd_check_result(struct hclge_hw *hw, struct hclge_desc *desc, + int num, int ntc) +{ + struct hclge_dev *hdev = container_of(hw, struct hclge_dev, hw); + bool is_completed = false; + u32 timeout = 0; + int handle, ret; + + /** + * If the command is sync, wait for the firmware to write back, + * if multi descriptors to be sent, use the first one to check + */ + if (HCLGE_SEND_SYNC(le16_to_cpu(desc->flag))) { + do { + if (hclge_cmd_csq_done(hw)) { + is_completed = true; + break; + } + udelay(1); + timeout++; + } while (timeout < hw->cmq.tx_timeout); + } + + if (!is_completed) + ret = -EBADE; + else + ret = hclge_cmd_check_retval(hw, desc, num, ntc); + + /* Clean the command send queue */ + handle = hclge_cmd_csq_clean(hw); + if (handle < 0) + ret = handle; + else if (handle != num) + dev_warn(&hdev->pdev->dev, + "cleaned %d, need to clean %d\n", handle, num); + return ret; +} + /** * hclge_cmd_send - send command to command queue * @hw: pointer to the hw struct @@ -257,11 +310,7 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) { struct hclge_dev *hdev = container_of(hw, struct hclge_dev, hw); struct hclge_cmq_ring *csq = &hw->cmq.csq; - struct hclge_desc *desc_to_use; - bool complete = false; - u32 timeout = 0; - int handle = 0; - int retval; + int ret; int ntc; spin_lock_bh(&hw->cmq.csq.lock); @@ -285,49 +334,17 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) * which will be use for hardware to write back */ ntc = hw->cmq.csq.next_to_use; - while (handle < num) { - desc_to_use = &hw->cmq.csq.desc[hw->cmq.csq.next_to_use]; - *desc_to_use = desc[handle]; - (hw->cmq.csq.next_to_use)++; - if (hw->cmq.csq.next_to_use >= hw->cmq.csq.desc_num) - hw->cmq.csq.next_to_use = 0; - handle++; - } + + hclge_cmd_copy_desc(hw, desc, num); /* Write to hardware */ hclge_write_dev(hw, HCLGE_NIC_CSQ_TAIL_REG, hw->cmq.csq.next_to_use); - /** - * If the command is sync, wait for the firmware to write back, - * if multi descriptors to be sent, use the first one to check - */ - if (HCLGE_SEND_SYNC(le16_to_cpu(desc->flag))) { - do { - if (hclge_cmd_csq_done(hw)) { - complete = true; - break; - } - udelay(1); - timeout++; - } while (timeout < hw->cmq.tx_timeout); - } - - if (!complete) - retval = -EBADE; - else - retval = hclge_cmd_check_retval(hw, desc, num, ntc); - - /* Clean the command send queue */ - handle = hclge_cmd_csq_clean(hw); - if (handle < 0) - retval = handle; - else if (handle != num) - dev_warn(&hdev->pdev->dev, - "cleaned %d, need to clean %d\n", handle, num); + ret = hclge_cmd_check_result(hw, desc, num, ntc); spin_unlock_bh(&hw->cmq.csq.lock); - return retval; + return ret; } static void hclge_set_default_capability(struct hclge_dev *hdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index e7c915eabc8a..ff52a65b4cff 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -1144,9 +1144,9 @@ static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value) } #define hclge_write_dev(a, reg, value) \ - hclge_write_reg((a)->io_base, (reg), (value)) + hclge_write_reg((a)->io_base, reg, value) #define hclge_read_dev(a, reg) \ - hclge_read_reg((a)->io_base, (reg)) + hclge_read_reg((a)->io_base, reg) static inline u32 hclge_read_reg(u8 __iomem *base, u32 reg) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index e08d11b8ecf1..5bf5db91d16c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -176,29 +176,6 @@ static int hclge_map_update(struct hclge_dev *hdev) return hclge_rss_init_hw(hdev); } -static int hclge_client_setup_tc(struct hclge_dev *hdev) -{ - struct hclge_vport *vport = hdev->vport; - struct hnae3_client *client; - struct hnae3_handle *handle; - int ret; - u32 i; - - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - handle = &vport[i].nic; - client = handle->client; - - if (!client || !client->ops || !client->ops->setup_tc) - continue; - - ret = client->ops->setup_tc(handle, hdev->tm_info.num_tc); - if (ret) - return ret; - } - - return 0; -} - static int hclge_notify_down_uinit(struct hclge_dev *hdev) { int ret; @@ -257,10 +234,6 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets) if (ret) goto err_out; - ret = hclge_client_setup_tc(hdev); - if (ret) - goto err_out; - ret = hclge_notify_init_up(hdev); if (ret) return ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index 113efd4ae157..6b1d197df881 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -696,17 +696,16 @@ static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev, u32 qset_mapping[HCLGE_BP_EXT_GRP_NUM]; struct hclge_qs_to_pri_link_cmd *map; struct hclge_tqp_tx_queue_tc_cmd *tc; + u16 group_id, queue_id, qset_id; enum hclge_opcode_type cmd; + u8 grp_num, pri_id, tc_id; struct hclge_desc desc; - int queue_id, group_id; - int tc_id, qset_id; - int pri_id, ret; u16 qs_id_l; u16 qs_id_h; - u8 grp_num; + int ret; u32 i; - ret = kstrtouint(cmd_buf, 0, &queue_id); + ret = kstrtou16(cmd_buf, 0, &queue_id); queue_id = (ret != 0) ? 0 : queue_id; cmd = HCLGE_OPC_TM_NQ_TO_QS_LINK; @@ -754,7 +753,7 @@ static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev, tc_id = tc->tc_id & 0x7; dev_info(&hdev->pdev->dev, "queue_id | qset_id | pri_id | tc_id\n"); - dev_info(&hdev->pdev->dev, "%04d | %04d | %02d | %02d\n", + dev_info(&hdev->pdev->dev, "%04u | %04u | %02u | %02u\n", queue_id, qset_id, pri_id, tc_id); if (!hnae3_dev_dcb_supported(hdev)) { @@ -985,39 +984,39 @@ static void hclge_dbg_dump_qos_pri_map(struct hclge_dev *hdev) dev_info(&hdev->pdev->dev, "pri_7_to_tc: 0x%x\n", pri_map->pri7_tc); } -static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev) +static int hclge_dbg_dump_tx_buf_cfg(struct hclge_dev *hdev) { struct hclge_tx_buff_alloc_cmd *tx_buf_cmd; - struct hclge_rx_priv_buff_cmd *rx_buf_cmd; - struct hclge_rx_priv_wl_buf *rx_priv_wl; - struct hclge_rx_com_wl *rx_packet_cnt; - struct hclge_rx_com_thrd *rx_com_thrd; - struct hclge_rx_com_wl *rx_com_wl; - enum hclge_opcode_type cmd; - struct hclge_desc desc[2]; + struct hclge_desc desc; int i, ret; - cmd = HCLGE_OPC_TX_BUFF_ALLOC; - hclge_cmd_setup_basic_desc(desc, cmd, true); - ret = hclge_cmd_send(&hdev->hw, desc, 1); + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TX_BUFF_ALLOC, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) - goto err_qos_cmd_send; + return ret; dev_info(&hdev->pdev->dev, "dump qos buf cfg\n"); - - tx_buf_cmd = (struct hclge_tx_buff_alloc_cmd *)desc[0].data; + tx_buf_cmd = (struct hclge_tx_buff_alloc_cmd *)desc.data; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) dev_info(&hdev->pdev->dev, "tx_packet_buf_tc_%d: 0x%x\n", i, le16_to_cpu(tx_buf_cmd->tx_pkt_buff[i])); - cmd = HCLGE_OPC_RX_PRIV_BUFF_ALLOC; - hclge_cmd_setup_basic_desc(desc, cmd, true); - ret = hclge_cmd_send(&hdev->hw, desc, 1); + return 0; +} + +static int hclge_dbg_dump_rx_priv_buf_cfg(struct hclge_dev *hdev) +{ + struct hclge_rx_priv_buff_cmd *rx_buf_cmd; + struct hclge_desc desc; + int i, ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RX_PRIV_BUFF_ALLOC, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) - goto err_qos_cmd_send; + return ret; dev_info(&hdev->pdev->dev, "\n"); - rx_buf_cmd = (struct hclge_rx_priv_buff_cmd *)desc[0].data; + rx_buf_cmd = (struct hclge_rx_priv_buff_cmd *)desc.data; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) dev_info(&hdev->pdev->dev, "rx_packet_buf_tc_%d: 0x%x\n", i, le16_to_cpu(rx_buf_cmd->buf_num[i])); @@ -1025,43 +1024,61 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev) dev_info(&hdev->pdev->dev, "rx_share_buf: 0x%x\n", le16_to_cpu(rx_buf_cmd->shared_buf)); - cmd = HCLGE_OPC_RX_COM_WL_ALLOC; - hclge_cmd_setup_basic_desc(desc, cmd, true); - ret = hclge_cmd_send(&hdev->hw, desc, 1); + return 0; +} + +static int hclge_dbg_dump_rx_common_wl_cfg(struct hclge_dev *hdev) +{ + struct hclge_rx_com_wl *rx_com_wl; + struct hclge_desc desc; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RX_COM_WL_ALLOC, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) - goto err_qos_cmd_send; + return ret; - rx_com_wl = (struct hclge_rx_com_wl *)desc[0].data; + rx_com_wl = (struct hclge_rx_com_wl *)desc.data; dev_info(&hdev->pdev->dev, "\n"); dev_info(&hdev->pdev->dev, "rx_com_wl: high: 0x%x, low: 0x%x\n", le16_to_cpu(rx_com_wl->com_wl.high), le16_to_cpu(rx_com_wl->com_wl.low)); - cmd = HCLGE_OPC_RX_GBL_PKT_CNT; - hclge_cmd_setup_basic_desc(desc, cmd, true); - ret = hclge_cmd_send(&hdev->hw, desc, 1); + return 0; +} + +static int hclge_dbg_dump_rx_global_pkt_cnt(struct hclge_dev *hdev) +{ + struct hclge_rx_com_wl *rx_packet_cnt; + struct hclge_desc desc; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RX_GBL_PKT_CNT, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) - goto err_qos_cmd_send; + return ret; - rx_packet_cnt = (struct hclge_rx_com_wl *)desc[0].data; + rx_packet_cnt = (struct hclge_rx_com_wl *)desc.data; dev_info(&hdev->pdev->dev, "rx_global_packet_cnt: high: 0x%x, low: 0x%x\n", le16_to_cpu(rx_packet_cnt->com_wl.high), le16_to_cpu(rx_packet_cnt->com_wl.low)); - dev_info(&hdev->pdev->dev, "\n"); - if (!hnae3_dev_dcb_supported(hdev)) { - dev_info(&hdev->pdev->dev, - "Only DCB-supported dev supports rx priv wl\n"); - return; - } - cmd = HCLGE_OPC_RX_PRIV_WL_ALLOC; - hclge_cmd_setup_basic_desc(&desc[0], cmd, true); + return 0; +} + +static int hclge_dbg_dump_rx_priv_wl_buf_cfg(struct hclge_dev *hdev) +{ + struct hclge_rx_priv_wl_buf *rx_priv_wl; + struct hclge_desc desc[2]; + int i, ret; + + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_RX_PRIV_WL_ALLOC, true); desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); - hclge_cmd_setup_basic_desc(&desc[1], cmd, true); + hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_RX_PRIV_WL_ALLOC, true); ret = hclge_cmd_send(&hdev->hw, desc, 2); if (ret) - goto err_qos_cmd_send; + return ret; rx_priv_wl = (struct hclge_rx_priv_wl_buf *)desc[0].data; for (i = 0; i < HCLGE_TC_NUM_ONE_DESC; i++) @@ -1078,13 +1095,21 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev) le16_to_cpu(rx_priv_wl->tc_wl[i].high), le16_to_cpu(rx_priv_wl->tc_wl[i].low)); - cmd = HCLGE_OPC_RX_COM_THRD_ALLOC; - hclge_cmd_setup_basic_desc(&desc[0], cmd, true); + return 0; +} + +static int hclge_dbg_dump_rx_common_threshold_cfg(struct hclge_dev *hdev) +{ + struct hclge_rx_com_thrd *rx_com_thrd; + struct hclge_desc desc[2]; + int i, ret; + + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_RX_COM_THRD_ALLOC, true); desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); - hclge_cmd_setup_basic_desc(&desc[1], cmd, true); + hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_RX_COM_THRD_ALLOC, true); ret = hclge_cmd_send(&hdev->hw, desc, 2); if (ret) - goto err_qos_cmd_send; + return ret; dev_info(&hdev->pdev->dev, "\n"); rx_com_thrd = (struct hclge_rx_com_thrd *)desc[0].data; @@ -1101,6 +1126,52 @@ static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev) i + HCLGE_TC_NUM_ONE_DESC, le16_to_cpu(rx_com_thrd->com_thrd[i].high), le16_to_cpu(rx_com_thrd->com_thrd[i].low)); + + return 0; +} + +static void hclge_dbg_dump_qos_buf_cfg(struct hclge_dev *hdev) +{ + enum hclge_opcode_type cmd; + int ret; + + cmd = HCLGE_OPC_TX_BUFF_ALLOC; + ret = hclge_dbg_dump_tx_buf_cfg(hdev); + if (ret) + goto err_qos_cmd_send; + + cmd = HCLGE_OPC_RX_PRIV_BUFF_ALLOC; + ret = hclge_dbg_dump_rx_priv_buf_cfg(hdev); + if (ret) + goto err_qos_cmd_send; + + cmd = HCLGE_OPC_RX_COM_WL_ALLOC; + ret = hclge_dbg_dump_rx_common_wl_cfg(hdev); + if (ret) + goto err_qos_cmd_send; + + cmd = HCLGE_OPC_RX_GBL_PKT_CNT; + ret = hclge_dbg_dump_rx_global_pkt_cnt(hdev); + if (ret) + goto err_qos_cmd_send; + + dev_info(&hdev->pdev->dev, "\n"); + if (!hnae3_dev_dcb_supported(hdev)) { + dev_info(&hdev->pdev->dev, + "Only DCB-supported dev supports rx priv wl\n"); + return; + } + + cmd = HCLGE_OPC_RX_PRIV_WL_ALLOC; + ret = hclge_dbg_dump_rx_priv_wl_buf_cfg(hdev); + if (ret) + goto err_qos_cmd_send; + + cmd = HCLGE_OPC_RX_COM_THRD_ALLOC; + ret = hclge_dbg_dump_rx_common_threshold_cfg(hdev); + if (ret) + goto err_qos_cmd_send; + return; err_qos_cmd_send: diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index 9ee55ee0487d..0ca7f1b984bf 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -1073,7 +1073,7 @@ static int hclge_config_ssu_hw_err_int(struct hclge_dev *hdev, bool en) * This function querys number of mpf and pf buffer descriptors. */ static int hclge_query_bd_num(struct hclge_dev *hdev, bool is_ras, - int *mpf_bd_num, int *pf_bd_num) + u32 *mpf_bd_num, u32 *pf_bd_num) { struct device *dev = &hdev->pdev->dev; u32 mpf_min_bd_num, pf_min_bd_num; @@ -1102,7 +1102,7 @@ static int hclge_query_bd_num(struct hclge_dev *hdev, bool is_ras, *mpf_bd_num = le32_to_cpu(desc_bd.data[0]); *pf_bd_num = le32_to_cpu(desc_bd.data[1]); if (*mpf_bd_num < mpf_min_bd_num || *pf_bd_num < pf_min_bd_num) { - dev_err(dev, "Invalid bd num: mpf(%d), pf(%d)\n", + dev_err(dev, "Invalid bd num: mpf(%u), pf(%u)\n", *mpf_bd_num, *pf_bd_num); return -EINVAL; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index f5a988498cc3..34b744df6709 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -13,6 +13,7 @@ #include <linux/platform_device.h> #include <linux/if_vlan.h> #include <linux/crash_dump.h> +#include <net/ipv6.h> #include <net/rtnetlink.h> #include "hclge_cmd.h" #include "hclge_dcb.h" @@ -24,7 +25,7 @@ #include "hnae3.h" #define HCLGE_NAME "hclge" -#define HCLGE_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset)))) +#define HCLGE_STATS_READ(p, offset) (*(u64 *)((u8 *)(p) + (offset))) #define HCLGE_MAC_STATS_FIELD_OFF(f) (offsetof(struct hclge_mac_stats, f)) #define HCLGE_BUF_SIZE_UNIT 256U @@ -626,7 +627,7 @@ static u8 *hclge_tqps_get_strings(struct hnae3_handle *handle, u8 *data) for (i = 0; i < kinfo->num_tqps; i++) { struct hclge_tqp *tqp = container_of(handle->kinfo.tqp[i], struct hclge_tqp, q); - snprintf(buff, ETH_GSTRING_LEN, "txq%d_pktnum_rcd", + snprintf(buff, ETH_GSTRING_LEN, "txq%u_pktnum_rcd", tqp->index); buff = buff + ETH_GSTRING_LEN; } @@ -634,7 +635,7 @@ static u8 *hclge_tqps_get_strings(struct hnae3_handle *handle, u8 *data) for (i = 0; i < kinfo->num_tqps; i++) { struct hclge_tqp *tqp = container_of(kinfo->tqp[i], struct hclge_tqp, q); - snprintf(buff, ETH_GSTRING_LEN, "rxq%d_pktnum_rcd", + snprintf(buff, ETH_GSTRING_LEN, "rxq%u_pktnum_rcd", tqp->index); buff = buff + ETH_GSTRING_LEN; } @@ -928,7 +929,7 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev) return 0; } -static int hclge_parse_speed(int speed_cmd, int *speed) +static int hclge_parse_speed(u8 speed_cmd, u32 *speed) { switch (speed_cmd) { case 6: @@ -4500,22 +4501,12 @@ static u8 hclge_get_rss_hash_bits(struct ethtool_rxnfc *nfc) return hash_sets; } -static int hclge_set_rss_tuple(struct hnae3_handle *handle, - struct ethtool_rxnfc *nfc) +static int hclge_init_rss_tuple_cmd(struct hclge_vport *vport, + struct ethtool_rxnfc *nfc, + struct hclge_rss_input_tuple_cmd *req) { - struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - struct hclge_rss_input_tuple_cmd *req; - struct hclge_desc desc; u8 tuple_sets; - int ret; - - if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | - RXH_L4_B_0_1 | RXH_L4_B_2_3)) - return -EINVAL; - - req = (struct hclge_rss_input_tuple_cmd *)desc.data; - hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, false); req->ipv4_tcp_en = vport->rss_tuple_sets.ipv4_tcp_en; req->ipv4_udp_en = vport->rss_tuple_sets.ipv4_udp_en; @@ -4560,6 +4551,32 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle, return -EINVAL; } + return 0; +} + +static int hclge_set_rss_tuple(struct hnae3_handle *handle, + struct ethtool_rxnfc *nfc) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + struct hclge_rss_input_tuple_cmd *req; + struct hclge_desc desc; + int ret; + + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EINVAL; + + req = (struct hclge_rss_input_tuple_cmd *)desc.data; + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, false); + + ret = hclge_init_rss_tuple_cmd(vport, nfc, req); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to init rss tuple cmd, ret = %d\n", ret); + return ret; + } + ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { dev_err(&hdev->pdev->dev, @@ -4579,52 +4596,69 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle, return 0; } -static int hclge_get_rss_tuple(struct hnae3_handle *handle, - struct ethtool_rxnfc *nfc) +static int hclge_get_vport_rss_tuple(struct hclge_vport *vport, int flow_type, + u8 *tuple_sets) { - struct hclge_vport *vport = hclge_get_vport(handle); - u8 tuple_sets; - - nfc->data = 0; - - switch (nfc->flow_type) { + switch (flow_type) { case TCP_V4_FLOW: - tuple_sets = vport->rss_tuple_sets.ipv4_tcp_en; + *tuple_sets = vport->rss_tuple_sets.ipv4_tcp_en; break; case UDP_V4_FLOW: - tuple_sets = vport->rss_tuple_sets.ipv4_udp_en; + *tuple_sets = vport->rss_tuple_sets.ipv4_udp_en; break; case TCP_V6_FLOW: - tuple_sets = vport->rss_tuple_sets.ipv6_tcp_en; + *tuple_sets = vport->rss_tuple_sets.ipv6_tcp_en; break; case UDP_V6_FLOW: - tuple_sets = vport->rss_tuple_sets.ipv6_udp_en; + *tuple_sets = vport->rss_tuple_sets.ipv6_udp_en; break; case SCTP_V4_FLOW: - tuple_sets = vport->rss_tuple_sets.ipv4_sctp_en; + *tuple_sets = vport->rss_tuple_sets.ipv4_sctp_en; break; case SCTP_V6_FLOW: - tuple_sets = vport->rss_tuple_sets.ipv6_sctp_en; + *tuple_sets = vport->rss_tuple_sets.ipv6_sctp_en; break; case IPV4_FLOW: case IPV6_FLOW: - tuple_sets = HCLGE_S_IP_BIT | HCLGE_D_IP_BIT; + *tuple_sets = HCLGE_S_IP_BIT | HCLGE_D_IP_BIT; break; default: return -EINVAL; } - if (!tuple_sets) - return 0; + return 0; +} + +static u64 hclge_convert_rss_tuple(u8 tuple_sets) +{ + u64 tuple_data = 0; if (tuple_sets & HCLGE_D_PORT_BIT) - nfc->data |= RXH_L4_B_2_3; + tuple_data |= RXH_L4_B_2_3; if (tuple_sets & HCLGE_S_PORT_BIT) - nfc->data |= RXH_L4_B_0_1; + tuple_data |= RXH_L4_B_0_1; if (tuple_sets & HCLGE_D_IP_BIT) - nfc->data |= RXH_IP_DST; + tuple_data |= RXH_IP_DST; if (tuple_sets & HCLGE_S_IP_BIT) - nfc->data |= RXH_IP_SRC; + tuple_data |= RXH_IP_SRC; + + return tuple_data; +} + +static int hclge_get_rss_tuple(struct hnae3_handle *handle, + struct ethtool_rxnfc *nfc) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + u8 tuple_sets; + int ret; + + nfc->data = 0; + + ret = hclge_get_vport_rss_tuple(vport, nfc->flow_type, &tuple_sets); + if (ret || !tuple_sets) + return ret; + + nfc->data = hclge_convert_rss_tuple(tuple_sets); return 0; } @@ -5508,12 +5542,10 @@ static int hclge_fd_check_tcpip6_tuple(struct ethtool_tcpip6_spec *spec, BIT(INNER_IP_TOS); /* check whether src/dst ip address used */ - if (!spec->ip6src[0] && !spec->ip6src[1] && - !spec->ip6src[2] && !spec->ip6src[3]) + if (ipv6_addr_any((struct in6_addr *)spec->ip6src)) *unused_tuple |= BIT(INNER_SRC_IP); - if (!spec->ip6dst[0] && !spec->ip6dst[1] && - !spec->ip6dst[2] && !spec->ip6dst[3]) + if (ipv6_addr_any((struct in6_addr *)spec->ip6dst)) *unused_tuple |= BIT(INNER_DST_IP); if (!spec->psrc) @@ -5538,12 +5570,10 @@ static int hclge_fd_check_ip6_tuple(struct ethtool_usrip6_spec *spec, BIT(INNER_IP_TOS) | BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT); /* check whether src/dst ip address used */ - if (!spec->ip6src[0] && !spec->ip6src[1] && - !spec->ip6src[2] && !spec->ip6src[3]) + if (ipv6_addr_any((struct in6_addr *)spec->ip6src)) *unused_tuple |= BIT(INNER_SRC_IP); - if (!spec->ip6dst[0] && !spec->ip6dst[1] && - !spec->ip6dst[2] && !spec->ip6dst[3]) + if (ipv6_addr_any((struct in6_addr *)spec->ip6dst)) *unused_tuple |= BIT(INNER_DST_IP); if (!spec->l4_proto) @@ -5595,7 +5625,7 @@ static int hclge_fd_check_ext_tuple(struct hclge_dev *hdev, if (fs->m_ext.vlan_tci && be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID) { dev_err(&hdev->pdev->dev, - "failed to config vlan_tci, invalid vlan_tci: %u, max is %u.\n", + "failed to config vlan_tci, invalid vlan_tci: %u, max is %d.\n", ntohs(fs->h_ext.vlan_tci), VLAN_N_VID - 1); return -EINVAL; } @@ -8323,36 +8353,18 @@ static void hclge_sync_mac_table(struct hclge_dev *hdev) } } -void hclge_rm_vport_all_mac_table(struct hclge_vport *vport, bool is_del_list, - enum HCLGE_MAC_ADDR_TYPE mac_type) +static void hclge_build_del_list(struct list_head *list, + bool is_del_list, + struct list_head *tmp_del_list) { - int (*unsync)(struct hclge_vport *vport, const unsigned char *addr); struct hclge_mac_node *mac_cfg, *tmp; - struct hclge_dev *hdev = vport->back; - struct list_head tmp_del_list, *list; - int ret; - - if (mac_type == HCLGE_MAC_ADDR_UC) { - list = &vport->uc_mac_list; - unsync = hclge_rm_uc_addr_common; - } else { - list = &vport->mc_mac_list; - unsync = hclge_rm_mc_addr_common; - } - - INIT_LIST_HEAD(&tmp_del_list); - - if (!is_del_list) - set_bit(vport->vport_id, hdev->vport_config_block); - - spin_lock_bh(&vport->mac_list_lock); list_for_each_entry_safe(mac_cfg, tmp, list, node) { switch (mac_cfg->state) { case HCLGE_MAC_TO_DEL: case HCLGE_MAC_ACTIVE: list_del(&mac_cfg->node); - list_add_tail(&mac_cfg->node, &tmp_del_list); + list_add_tail(&mac_cfg->node, tmp_del_list); break; case HCLGE_MAC_TO_ADD: if (is_del_list) { @@ -8362,10 +8374,18 @@ void hclge_rm_vport_all_mac_table(struct hclge_vport *vport, bool is_del_list, break; } } +} - spin_unlock_bh(&vport->mac_list_lock); +static void hclge_unsync_del_list(struct hclge_vport *vport, + int (*unsync)(struct hclge_vport *vport, + const unsigned char *addr), + bool is_del_list, + struct list_head *tmp_del_list) +{ + struct hclge_mac_node *mac_cfg, *tmp; + int ret; - list_for_each_entry_safe(mac_cfg, tmp, &tmp_del_list, node) { + list_for_each_entry_safe(mac_cfg, tmp, tmp_del_list, node) { ret = unsync(vport, mac_cfg->mac_addr); if (!ret || ret == -ENOENT) { /* clear all mac addr from hardware, but remain these @@ -8383,6 +8403,35 @@ void hclge_rm_vport_all_mac_table(struct hclge_vport *vport, bool is_del_list, mac_cfg->state = HCLGE_MAC_TO_DEL; } } +} + +void hclge_rm_vport_all_mac_table(struct hclge_vport *vport, bool is_del_list, + enum HCLGE_MAC_ADDR_TYPE mac_type) +{ + int (*unsync)(struct hclge_vport *vport, const unsigned char *addr); + struct hclge_dev *hdev = vport->back; + struct list_head tmp_del_list, *list; + + if (mac_type == HCLGE_MAC_ADDR_UC) { + list = &vport->uc_mac_list; + unsync = hclge_rm_uc_addr_common; + } else { + list = &vport->mc_mac_list; + unsync = hclge_rm_mc_addr_common; + } + + INIT_LIST_HEAD(&tmp_del_list); + + if (!is_del_list) + set_bit(vport->vport_id, hdev->vport_config_block); + + spin_lock_bh(&vport->mac_list_lock); + + hclge_build_del_list(list, is_del_list, &tmp_del_list); + + spin_unlock_bh(&vport->mac_list_lock); + + hclge_unsync_del_list(vport, unsync, is_del_list, &tmp_del_list); spin_lock_bh(&vport->mac_list_lock); @@ -8789,32 +8838,16 @@ static void hclge_enable_vlan_filter(struct hnae3_handle *handle, bool enable) handle->netdev_flags &= ~HNAE3_VLAN_FLTR; } -static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid, - bool is_kill, u16 vlan, - __be16 proto) +static int hclge_set_vf_vlan_filter_cmd(struct hclge_dev *hdev, u16 vfid, + bool is_kill, u16 vlan, + struct hclge_desc *desc) { - struct hclge_vport *vport = &hdev->vport[vfid]; struct hclge_vlan_filter_vf_cfg_cmd *req0; struct hclge_vlan_filter_vf_cfg_cmd *req1; - struct hclge_desc desc[2]; u8 vf_byte_val; u8 vf_byte_off; int ret; - /* if vf vlan table is full, firmware will close vf vlan filter, it - * is unable and unnecessary to add new vlan id to vf vlan filter. - * If spoof check is enable, and vf vlan is full, it shouldn't add - * new vlan, because tx packets with these vlan id will be dropped. - */ - if (test_bit(vfid, hdev->vf_vlan_full) && !is_kill) { - if (vport->vf_info.spoofchk && vlan) { - dev_err(&hdev->pdev->dev, - "Can't add vlan due to spoof check is on and vf vlan table is full\n"); - return -EPERM; - } - return 0; - } - hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_VLAN_FILTER_VF_CFG, false); hclge_cmd_setup_basic_desc(&desc[1], @@ -8844,12 +8877,22 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid, return ret; } + return 0; +} + +static int hclge_check_vf_vlan_cmd_status(struct hclge_dev *hdev, u16 vfid, + bool is_kill, struct hclge_desc *desc) +{ + struct hclge_vlan_filter_vf_cfg_cmd *req; + + req = (struct hclge_vlan_filter_vf_cfg_cmd *)desc[0].data; + if (!is_kill) { #define HCLGE_VF_VLAN_NO_ENTRY 2 - if (!req0->resp_code || req0->resp_code == 1) + if (!req->resp_code || req->resp_code == 1) return 0; - if (req0->resp_code == HCLGE_VF_VLAN_NO_ENTRY) { + if (req->resp_code == HCLGE_VF_VLAN_NO_ENTRY) { set_bit(vfid, hdev->vf_vlan_full); dev_warn(&hdev->pdev->dev, "vf vlan table is full, vf vlan filter is disabled\n"); @@ -8858,10 +8901,10 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid, dev_err(&hdev->pdev->dev, "Add vf vlan filter fail, ret =%u.\n", - req0->resp_code); + req->resp_code); } else { #define HCLGE_VF_VLAN_DEL_NO_FOUND 1 - if (!req0->resp_code) + if (!req->resp_code) return 0; /* vf vlan filter is disabled when vf vlan table is full, @@ -8869,17 +8912,46 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid, * Just return 0 without warning, avoid massive verbose * print logs when unload. */ - if (req0->resp_code == HCLGE_VF_VLAN_DEL_NO_FOUND) + if (req->resp_code == HCLGE_VF_VLAN_DEL_NO_FOUND) return 0; dev_err(&hdev->pdev->dev, "Kill vf vlan filter fail, ret =%u.\n", - req0->resp_code); + req->resp_code); } return -EIO; } +static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid, + bool is_kill, u16 vlan, + __be16 proto) +{ + struct hclge_vport *vport = &hdev->vport[vfid]; + struct hclge_desc desc[2]; + int ret; + + /* if vf vlan table is full, firmware will close vf vlan filter, it + * is unable and unnecessary to add new vlan id to vf vlan filter. + * If spoof check is enable, and vf vlan is full, it shouldn't add + * new vlan, because tx packets with these vlan id will be dropped. + */ + if (test_bit(vfid, hdev->vf_vlan_full) && !is_kill) { + if (vport->vf_info.spoofchk && vlan) { + dev_err(&hdev->pdev->dev, + "Can't add vlan due to spoof check is on and vf vlan table is full\n"); + return -EPERM; + } + return 0; + } + + ret = hclge_set_vf_vlan_filter_cmd(hdev, vfid, is_kill, vlan, desc); + if (ret) + return ret; + + return hclge_check_vf_vlan_cmd_status(hdev, vfid, is_kill, desc); +} + static int hclge_set_port_vlan_filter(struct hclge_dev *hdev, __be16 proto, u16 vlan_id, bool is_kill) { @@ -9830,12 +9902,19 @@ int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id) void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id) { + struct hnae3_handle *handle = &vport->nic; struct hclge_dev *hdev = vport->back; int reset_try_times = 0; int reset_status; u16 queue_gid; int ret; + if (queue_id >= handle->kinfo.num_tqps) { + dev_warn(&hdev->pdev->dev, "Invalid vf queue id(%u)\n", + queue_id); + return; + } + queue_gid = hclge_covert_handle_qid_global(&vport->nic, queue_id); ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true); @@ -10831,7 +10910,7 @@ static void hclge_reset_vf_rate(struct hclge_dev *hdev) } } -static int hclge_vf_rate_param_check(struct hclge_dev *hdev, int vf, +static int hclge_vf_rate_param_check(struct hclge_dev *hdev, int min_tx_rate, int max_tx_rate) { if (min_tx_rate != 0 || @@ -10852,7 +10931,7 @@ static int hclge_set_vf_rate(struct hnae3_handle *handle, int vf, struct hclge_dev *hdev = vport->back; int ret; - ret = hclge_vf_rate_param_check(hdev, vf, min_tx_rate, max_tx_rate); + ret = hclge_vf_rate_param_check(hdev, min_tx_rate, max_tx_rate); if (ret) return ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 33b17a199e18..19d7f28773f3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -46,15 +46,12 @@ #define HCLGE_CMDQ_RX_DEPTH_REG 0x27020 #define HCLGE_CMDQ_RX_TAIL_REG 0x27024 #define HCLGE_CMDQ_RX_HEAD_REG 0x27028 -#define HCLGE_CMDQ_INTR_SRC_REG 0x27100 #define HCLGE_CMDQ_INTR_STS_REG 0x27104 #define HCLGE_CMDQ_INTR_EN_REG 0x27108 #define HCLGE_CMDQ_INTR_GEN_REG 0x2710C /* bar registers for common func */ #define HCLGE_VECTOR0_OTER_EN_REG 0x20600 -#define HCLGE_RAS_OTHER_STS_REG 0x20B00 -#define HCLGE_FUNC_RESET_STS_REG 0x20C00 #define HCLGE_GRO_EN_REG 0x28000 /* bar registers for rcb */ @@ -728,7 +725,7 @@ struct hclge_vf_vlan_cfg { * x = (~k) & v * y = (k ^ ~v) & k */ -#define calc_x(x, k, v) ((x) = (~(k) & (v))) +#define calc_x(x, k, v) (x = ~(k) & (v)) #define calc_y(y, k, v) \ do { \ const typeof(k) _k_ = (k); \ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 754c09ada901..51a36e74f088 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -56,7 +56,7 @@ static int hclge_gen_resp_to_vf(struct hclge_vport *vport, resp_pf_to_vf->msg.resp_status = resp; } else { dev_warn(&hdev->pdev->dev, - "failed to send response to VF, response status %d is out-of-bound\n", + "failed to send response to VF, response status %u is out-of-bound\n", resp); resp_pf_to_vf->msg.resp_status = EIO; } @@ -158,21 +158,31 @@ static int hclge_get_ring_chain_from_mbx( struct hclge_vport *vport) { struct hnae3_ring_chain_node *cur_chain, *new_chain; + struct hclge_dev *hdev = vport->back; int ring_num; - int i = 0; + int i; ring_num = req->msg.ring_num; if (ring_num > HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM) return -ENOMEM; + for (i = 0; i < ring_num; i++) { + if (req->msg.param[i].tqp_index >= vport->nic.kinfo.rss_size) { + dev_err(&hdev->pdev->dev, "tqp index(%u) is out of range(0-%u)\n", + req->msg.param[i].tqp_index, + vport->nic.kinfo.rss_size - 1); + return -EINVAL; + } + } + hnae3_set_bit(ring_chain->flag, HNAE3_RING_TYPE_B, - req->msg.param[i].ring_type); + req->msg.param[0].ring_type); ring_chain->tqp_index = hclge_get_queue_id(vport->nic.kinfo.tqp - [req->msg.param[i].tqp_index]); + [req->msg.param[0].tqp_index]); hnae3_set_field(ring_chain->int_gl_idx, HNAE3_RING_GL_IDX_M, - HNAE3_RING_GL_IDX_S, req->msg.param[i].int_gl_index); + HNAE3_RING_GL_IDX_S, req->msg.param[0].int_gl_index); cur_chain = ring_chain; @@ -597,6 +607,17 @@ static void hclge_get_rss_key(struct hclge_vport *vport, index = mbx_req->msg.data[0]; + /* Check the query index of rss_hash_key from VF, make sure no + * more than the size of rss_hash_key. + */ + if (((index + 1) * HCLGE_RSS_MBX_RESP_LEN) > + sizeof(vport[0].rss_hash_key)) { + dev_warn(&hdev->pdev->dev, + "failed to get the rss hash key, the index(%u) invalid !\n", + index); + return; + } + memcpy(resp_msg->data, &hdev->vport[0].rss_hash_key[index * HCLGE_RSS_MBX_RESP_LEN], HCLGE_RSS_MBX_RESP_LEN); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 906d98e515aa..151afd1f0688 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -41,8 +41,9 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level, struct hclge_shaper_ir_para *ir_para, u32 max_tm_rate) { +#define DEFAULT_SHAPER_IR_B 126 #define DIVISOR_CLK (1000 * 8) -#define DIVISOR_IR_B_126 (126 * DIVISOR_CLK) +#define DEFAULT_DIVISOR_IR_B (DEFAULT_SHAPER_IR_B * DIVISOR_CLK) static const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = { 6 * 256, /* Prioriy level */ @@ -69,10 +70,10 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level, * ir_calc = ---------------- * 1000 * tick * 1 */ - ir_calc = (DIVISOR_IR_B_126 + (tick >> 1) - 1) / tick; + ir_calc = (DEFAULT_DIVISOR_IR_B + (tick >> 1) - 1) / tick; if (ir_calc == ir) { - ir_para->ir_b = 126; + ir_para->ir_b = DEFAULT_SHAPER_IR_B; ir_para->ir_u = 0; ir_para->ir_s = 0; @@ -81,7 +82,8 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level, /* Increasing the denominator to select ir_s value */ while (ir_calc >= ir && ir) { ir_s_calc++; - ir_calc = DIVISOR_IR_B_126 / (tick * (1 << ir_s_calc)); + ir_calc = DEFAULT_DIVISOR_IR_B / + (tick * (1 << ir_s_calc)); } ir_para->ir_b = (ir * tick * (1 << ir_s_calc) + @@ -92,12 +94,12 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level, while (ir_calc < ir) { ir_u_calc++; - numerator = DIVISOR_IR_B_126 * (1 << ir_u_calc); + numerator = DEFAULT_DIVISOR_IR_B * (1 << ir_u_calc); ir_calc = (numerator + (tick >> 1)) / tick; } if (ir_calc == ir) { - ir_para->ir_b = 126; + ir_para->ir_b = DEFAULT_SHAPER_IR_B; } else { u32 denominator = DIVISOR_CLK * (1 << --ir_u_calc); ir_para->ir_b = (ir * tick + (denominator >> 1)) / diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index d33cb04acbef..b25d76023af0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -17,7 +17,7 @@ /* SP or DWRR */ #define HCLGE_TM_TX_SCHD_DWRR_MSK BIT(0) -#define HCLGE_TM_TX_SCHD_SP_MSK (0xFE) +#define HCLGE_TM_TX_SCHD_SP_MSK 0xFE #define HCLGE_ETHER_MAX_RATE 100000 @@ -214,8 +214,8 @@ struct hclge_pri_shaper_para { (HCLGE_TM_SHAP_##string##_MSK), \ (HCLGE_TM_SHAP_##string##_LSH), val) #define hclge_tm_get_field(src, string) \ - hnae3_get_field((src), (HCLGE_TM_SHAP_##string##_MSK), \ - (HCLGE_TM_SHAP_##string##_LSH)) + hnae3_get_field((src), HCLGE_TM_SHAP_##string##_MSK, \ + HCLGE_TM_SHAP_##string##_LSH) int hclge_tm_schd_init(struct hclge_dev *hdev); int hclge_tm_vport_map_update(struct hclge_dev *hdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c index 0f93c2dd890d..46700c427849 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c @@ -176,36 +176,111 @@ void hclgevf_cmd_setup_basic_desc(struct hclgevf_desc *desc, desc->flag &= cpu_to_le16(~HCLGEVF_CMD_FLAG_WR); } +struct vf_errcode { + u32 imp_errcode; + int common_errno; +}; + +static void hclgevf_cmd_copy_desc(struct hclgevf_hw *hw, + struct hclgevf_desc *desc, int num) +{ + struct hclgevf_desc *desc_to_use; + int handle = 0; + + while (handle < num) { + desc_to_use = &hw->cmq.csq.desc[hw->cmq.csq.next_to_use]; + *desc_to_use = desc[handle]; + (hw->cmq.csq.next_to_use)++; + if (hw->cmq.csq.next_to_use == hw->cmq.csq.desc_num) + hw->cmq.csq.next_to_use = 0; + handle++; + } +} + static int hclgevf_cmd_convert_err_code(u16 desc_ret) { - switch (desc_ret) { - case HCLGEVF_CMD_EXEC_SUCCESS: - return 0; - case HCLGEVF_CMD_NO_AUTH: - return -EPERM; - case HCLGEVF_CMD_NOT_SUPPORTED: - return -EOPNOTSUPP; - case HCLGEVF_CMD_QUEUE_FULL: - return -EXFULL; - case HCLGEVF_CMD_NEXT_ERR: - return -ENOSR; - case HCLGEVF_CMD_UNEXE_ERR: - return -ENOTBLK; - case HCLGEVF_CMD_PARA_ERR: - return -EINVAL; - case HCLGEVF_CMD_RESULT_ERR: - return -ERANGE; - case HCLGEVF_CMD_TIMEOUT: - return -ETIME; - case HCLGEVF_CMD_HILINK_ERR: - return -ENOLINK; - case HCLGEVF_CMD_QUEUE_ILLEGAL: - return -ENXIO; - case HCLGEVF_CMD_INVALID: - return -EBADR; - default: - return -EIO; + struct vf_errcode hclgevf_cmd_errcode[] = { + {HCLGEVF_CMD_EXEC_SUCCESS, 0}, + {HCLGEVF_CMD_NO_AUTH, -EPERM}, + {HCLGEVF_CMD_NOT_SUPPORTED, -EOPNOTSUPP}, + {HCLGEVF_CMD_QUEUE_FULL, -EXFULL}, + {HCLGEVF_CMD_NEXT_ERR, -ENOSR}, + {HCLGEVF_CMD_UNEXE_ERR, -ENOTBLK}, + {HCLGEVF_CMD_PARA_ERR, -EINVAL}, + {HCLGEVF_CMD_RESULT_ERR, -ERANGE}, + {HCLGEVF_CMD_TIMEOUT, -ETIME}, + {HCLGEVF_CMD_HILINK_ERR, -ENOLINK}, + {HCLGEVF_CMD_QUEUE_ILLEGAL, -ENXIO}, + {HCLGEVF_CMD_INVALID, -EBADR}, + }; + u32 errcode_count = ARRAY_SIZE(hclgevf_cmd_errcode); + u32 i; + + for (i = 0; i < errcode_count; i++) + if (hclgevf_cmd_errcode[i].imp_errcode == desc_ret) + return hclgevf_cmd_errcode[i].common_errno; + + return -EIO; +} + +static int hclgevf_cmd_check_retval(struct hclgevf_hw *hw, + struct hclgevf_desc *desc, int num, int ntc) +{ + u16 opcode, desc_ret; + int handle; + + opcode = le16_to_cpu(desc[0].opcode); + for (handle = 0; handle < num; handle++) { + /* Get the result of hardware write back */ + desc[handle] = hw->cmq.csq.desc[ntc]; + ntc++; + if (ntc == hw->cmq.csq.desc_num) + ntc = 0; } + if (likely(!hclgevf_is_special_opcode(opcode))) + desc_ret = le16_to_cpu(desc[num - 1].retval); + else + desc_ret = le16_to_cpu(desc[0].retval); + hw->cmq.last_status = desc_ret; + + return hclgevf_cmd_convert_err_code(desc_ret); +} + +static int hclgevf_cmd_check_result(struct hclgevf_hw *hw, + struct hclgevf_desc *desc, int num, int ntc) +{ + struct hclgevf_dev *hdev = (struct hclgevf_dev *)hw->hdev; + bool is_completed = false; + u32 timeout = 0; + int handle, ret; + + /* If the command is sync, wait for the firmware to write back, + * if multi descriptors to be sent, use the first one to check + */ + if (HCLGEVF_SEND_SYNC(le16_to_cpu(desc->flag))) { + do { + if (hclgevf_cmd_csq_done(hw)) { + is_completed = true; + break; + } + udelay(1); + timeout++; + } while (timeout < hw->cmq.tx_timeout); + } + + if (!is_completed) + ret = -EBADE; + else + ret = hclgevf_cmd_check_retval(hw, desc, num, ntc); + + /* Clean the command send queue */ + handle = hclgevf_cmd_csq_clean(hw); + if (handle < 0) + ret = handle; + else if (handle != num) + dev_warn(&hdev->pdev->dev, + "cleaned %d, need to clean %d\n", handle, num); + return ret; } /* hclgevf_cmd_send - send command to command queue @@ -220,13 +295,7 @@ int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num) { struct hclgevf_dev *hdev = (struct hclgevf_dev *)hw->hdev; struct hclgevf_cmq_ring *csq = &hw->cmq.csq; - struct hclgevf_desc *desc_to_use; - bool complete = false; - u32 timeout = 0; - int handle = 0; - int status = 0; - u16 retval; - u16 opcode; + int ret; int ntc; spin_lock_bh(&hw->cmq.csq.lock); @@ -250,67 +319,18 @@ int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num) * which will be use for hardware to write back */ ntc = hw->cmq.csq.next_to_use; - opcode = le16_to_cpu(desc[0].opcode); - while (handle < num) { - desc_to_use = &hw->cmq.csq.desc[hw->cmq.csq.next_to_use]; - *desc_to_use = desc[handle]; - (hw->cmq.csq.next_to_use)++; - if (hw->cmq.csq.next_to_use == hw->cmq.csq.desc_num) - hw->cmq.csq.next_to_use = 0; - handle++; - } + + hclgevf_cmd_copy_desc(hw, desc, num); /* Write to hardware */ hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_TAIL_REG, hw->cmq.csq.next_to_use); - /* If the command is sync, wait for the firmware to write back, - * if multi descriptors to be sent, use the first one to check - */ - if (HCLGEVF_SEND_SYNC(le16_to_cpu(desc->flag))) { - do { - if (hclgevf_cmd_csq_done(hw)) - break; - udelay(1); - timeout++; - } while (timeout < hw->cmq.tx_timeout); - } - - if (hclgevf_cmd_csq_done(hw)) { - complete = true; - handle = 0; - - while (handle < num) { - /* Get the result of hardware write back */ - desc_to_use = &hw->cmq.csq.desc[ntc]; - desc[handle] = *desc_to_use; - - if (likely(!hclgevf_is_special_opcode(opcode))) - retval = le16_to_cpu(desc[handle].retval); - else - retval = le16_to_cpu(desc[0].retval); - - status = hclgevf_cmd_convert_err_code(retval); - hw->cmq.last_status = (enum hclgevf_cmd_status)retval; - ntc++; - handle++; - if (ntc == hw->cmq.csq.desc_num) - ntc = 0; - } - } - - if (!complete) - status = -EBADE; - - /* Clean the command send queue */ - handle = hclgevf_cmd_csq_clean(hw); - if (handle != num) - dev_warn(&hdev->pdev->dev, - "cleaned %d, need to clean %d\n", handle, num); + ret = hclgevf_cmd_check_result(hw, desc, num, ntc); spin_unlock_bh(&hw->cmq.csq.lock); - return status; + return ret; } static void hclgevf_set_default_capability(struct hclgevf_dev *hdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h index ac2864a7ce8d..8a37a22a176b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h @@ -216,8 +216,8 @@ struct hclgevf_rss_input_tuple_cmd { #define HCLGEVF_RSS_CFG_TBL_SIZE 16 struct hclgevf_rss_indirection_table_cmd { - u16 start_table_index; - u16 rss_set_bitmap; + __le16 start_table_index; + __le16 rss_set_bitmap; u8 rsv[4]; u8 rss_result[HCLGEVF_RSS_CFG_TBL_SIZE]; }; @@ -229,7 +229,7 @@ struct hclgevf_rss_indirection_table_cmd { #define HCLGEVF_RSS_TC_VALID_B 15 #define HCLGEVF_MAX_TC_NUM 8 struct hclgevf_rss_tc_mode_cmd { - u16 rss_tc_mode[HCLGEVF_MAX_TC_NUM]; + __le16 rss_tc_mode[HCLGEVF_MAX_TC_NUM]; u8 rsv[8]; }; @@ -278,7 +278,6 @@ struct hclgevf_cfg_tx_queue_pointer_cmd { #define HCLGEVF_NIC_CMQ_DESC_NUM 1024 #define HCLGEVF_NIC_CMQ_DESC_NUM_S 3 -#define HCLGEVF_NIC_CMDQ_INT_SRC_REG 0x27100 #define HCLGEVF_QUERY_DEV_SPECS_BD_NUM 4 @@ -315,9 +314,9 @@ static inline u32 hclgevf_read_reg(u8 __iomem *base, u32 reg) } #define hclgevf_write_dev(a, reg, value) \ - hclgevf_write_reg((a)->io_base, (reg), (value)) + hclgevf_write_reg((a)->io_base, reg, value) #define hclgevf_read_dev(a, reg) \ - hclgevf_read_reg((a)->io_base, (reg)) + hclgevf_read_reg((a)->io_base, reg) #define HCLGEVF_SEND_SYNC(flag) \ ((flag) & HCLGEVF_CMD_FLAG_NO_INTR) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index cdb1131ba239..700e068764c8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -180,7 +180,7 @@ static u8 *hclgevf_tqps_get_strings(struct hnae3_handle *handle, u8 *data) for (i = 0; i < kinfo->num_tqps; i++) { struct hclgevf_tqp *tqp = container_of(kinfo->tqp[i], struct hclgevf_tqp, q); - snprintf(buff, ETH_GSTRING_LEN, "txq%d_pktnum_rcd", + snprintf(buff, ETH_GSTRING_LEN, "txq%u_pktnum_rcd", tqp->index); buff += ETH_GSTRING_LEN; } @@ -188,7 +188,7 @@ static u8 *hclgevf_tqps_get_strings(struct hnae3_handle *handle, u8 *data) for (i = 0; i < kinfo->num_tqps; i++) { struct hclgevf_tqp *tqp = container_of(kinfo->tqp[i], struct hclgevf_tqp, q); - snprintf(buff, ETH_GSTRING_LEN, "rxq%d_pktnum_rcd", + snprintf(buff, ETH_GSTRING_LEN, "rxq%u_pktnum_rcd", tqp->index); buff += ETH_GSTRING_LEN; } @@ -658,8 +658,9 @@ static int hclgevf_set_rss_indir_table(struct hclgevf_dev *hdev) for (i = 0; i < rss_cfg_tbl_num; i++) { hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_INDIR_TABLE, false); - req->start_table_index = i * HCLGEVF_RSS_CFG_TBL_SIZE; - req->rss_set_bitmap = HCLGEVF_RSS_SET_BITMAP_MSK; + req->start_table_index = + cpu_to_le16(i * HCLGEVF_RSS_CFG_TBL_SIZE); + req->rss_set_bitmap = cpu_to_le16(HCLGEVF_RSS_SET_BITMAP_MSK); for (j = 0; j < HCLGEVF_RSS_CFG_TBL_SIZE; j++) req->rss_result[j] = indir[i * HCLGEVF_RSS_CFG_TBL_SIZE + j]; @@ -700,12 +701,16 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev, u16 rss_size) hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_TC_MODE, false); for (i = 0; i < HCLGEVF_MAX_TC_NUM; i++) { - hnae3_set_bit(req->rss_tc_mode[i], HCLGEVF_RSS_TC_VALID_B, + u16 mode = 0; + + hnae3_set_bit(mode, HCLGEVF_RSS_TC_VALID_B, (tc_valid[i] & 0x1)); - hnae3_set_field(req->rss_tc_mode[i], HCLGEVF_RSS_TC_SIZE_M, + hnae3_set_field(mode, HCLGEVF_RSS_TC_SIZE_M, HCLGEVF_RSS_TC_SIZE_S, tc_size[i]); - hnae3_set_field(req->rss_tc_mode[i], HCLGEVF_RSS_TC_OFFSET_M, + hnae3_set_field(mode, HCLGEVF_RSS_TC_OFFSET_M, HCLGEVF_RSS_TC_OFFSET_S, tc_offset[i]); + + req->rss_tc_mode[i] = cpu_to_le16(mode); } status = hclgevf_cmd_send(&hdev->hw, &desc, 1); if (status) @@ -868,25 +873,13 @@ static u8 hclgevf_get_rss_hash_bits(struct ethtool_rxnfc *nfc) return hash_sets; } -static int hclgevf_set_rss_tuple(struct hnae3_handle *handle, - struct ethtool_rxnfc *nfc) +static int hclgevf_init_rss_tuple_cmd(struct hnae3_handle *handle, + struct ethtool_rxnfc *nfc, + struct hclgevf_rss_input_tuple_cmd *req) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg; - struct hclgevf_rss_input_tuple_cmd *req; - struct hclgevf_desc desc; u8 tuple_sets; - int ret; - - if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) - return -EOPNOTSUPP; - - if (nfc->data & - ~(RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3)) - return -EINVAL; - - req = (struct hclgevf_rss_input_tuple_cmd *)desc.data; - hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_INPUT_TUPLE, false); req->ipv4_tcp_en = rss_cfg->rss_tuple_sets.ipv4_tcp_en; req->ipv4_udp_en = rss_cfg->rss_tuple_sets.ipv4_udp_en; @@ -931,6 +924,35 @@ static int hclgevf_set_rss_tuple(struct hnae3_handle *handle, return -EINVAL; } + return 0; +} + +static int hclgevf_set_rss_tuple(struct hnae3_handle *handle, + struct ethtool_rxnfc *nfc) +{ + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg; + struct hclgevf_rss_input_tuple_cmd *req; + struct hclgevf_desc desc; + int ret; + + if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) + return -EOPNOTSUPP; + + if (nfc->data & + ~(RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EINVAL; + + req = (struct hclgevf_rss_input_tuple_cmd *)desc.data; + hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_INPUT_TUPLE, false); + + ret = hclgevf_init_rss_tuple_cmd(handle, nfc, req); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to init rss tuple cmd, ret = %d\n", ret); + return ret; + } + ret = hclgevf_cmd_send(&hdev->hw, &desc, 1); if (ret) { dev_err(&hdev->pdev->dev, @@ -949,56 +971,73 @@ static int hclgevf_set_rss_tuple(struct hnae3_handle *handle, return 0; } -static int hclgevf_get_rss_tuple(struct hnae3_handle *handle, - struct ethtool_rxnfc *nfc) +static int hclgevf_get_rss_tuple_by_flow_type(struct hclgevf_dev *hdev, + int flow_type, u8 *tuple_sets) { - struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); - struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg; - u8 tuple_sets; - - if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) - return -EOPNOTSUPP; - - nfc->data = 0; - - switch (nfc->flow_type) { + switch (flow_type) { case TCP_V4_FLOW: - tuple_sets = rss_cfg->rss_tuple_sets.ipv4_tcp_en; + *tuple_sets = hdev->rss_cfg.rss_tuple_sets.ipv4_tcp_en; break; case UDP_V4_FLOW: - tuple_sets = rss_cfg->rss_tuple_sets.ipv4_udp_en; + *tuple_sets = hdev->rss_cfg.rss_tuple_sets.ipv4_udp_en; break; case TCP_V6_FLOW: - tuple_sets = rss_cfg->rss_tuple_sets.ipv6_tcp_en; + *tuple_sets = hdev->rss_cfg.rss_tuple_sets.ipv6_tcp_en; break; case UDP_V6_FLOW: - tuple_sets = rss_cfg->rss_tuple_sets.ipv6_udp_en; + *tuple_sets = hdev->rss_cfg.rss_tuple_sets.ipv6_udp_en; break; case SCTP_V4_FLOW: - tuple_sets = rss_cfg->rss_tuple_sets.ipv4_sctp_en; + *tuple_sets = hdev->rss_cfg.rss_tuple_sets.ipv4_sctp_en; break; case SCTP_V6_FLOW: - tuple_sets = rss_cfg->rss_tuple_sets.ipv6_sctp_en; + *tuple_sets = hdev->rss_cfg.rss_tuple_sets.ipv6_sctp_en; break; case IPV4_FLOW: case IPV6_FLOW: - tuple_sets = HCLGEVF_S_IP_BIT | HCLGEVF_D_IP_BIT; + *tuple_sets = HCLGEVF_S_IP_BIT | HCLGEVF_D_IP_BIT; break; default: return -EINVAL; } - if (!tuple_sets) - return 0; + return 0; +} + +static u64 hclgevf_convert_rss_tuple(u8 tuple_sets) +{ + u64 tuple_data = 0; if (tuple_sets & HCLGEVF_D_PORT_BIT) - nfc->data |= RXH_L4_B_2_3; + tuple_data |= RXH_L4_B_2_3; if (tuple_sets & HCLGEVF_S_PORT_BIT) - nfc->data |= RXH_L4_B_0_1; + tuple_data |= RXH_L4_B_0_1; if (tuple_sets & HCLGEVF_D_IP_BIT) - nfc->data |= RXH_IP_DST; + tuple_data |= RXH_IP_DST; if (tuple_sets & HCLGEVF_S_IP_BIT) - nfc->data |= RXH_IP_SRC; + tuple_data |= RXH_IP_SRC; + + return tuple_data; +} + +static int hclgevf_get_rss_tuple(struct hnae3_handle *handle, + struct ethtool_rxnfc *nfc) +{ + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + u8 tuple_sets; + int ret; + + if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) + return -EOPNOTSUPP; + + nfc->data = 0; + + ret = hclgevf_get_rss_tuple_by_flow_type(hdev, nfc->flow_type, + &tuple_sets); + if (ret || !tuple_sets) + return ret; + + nfc->data = hclgevf_convert_rss_tuple(tuple_sets); return 0; } diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 481bcedb391a..1774fbaab146 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -115,7 +115,7 @@ struct ibmvnic_stat { #define IBMVNIC_STAT_OFF(stat) (offsetof(struct ibmvnic_adapter, stats) + \ offsetof(struct ibmvnic_statistics, stat)) -#define IBMVNIC_GET_STAT(a, off) (*((u64 *)(((unsigned long)(a)) + off))) +#define IBMVNIC_GET_STAT(a, off) (*((u64 *)(((unsigned long)(a)) + (off)))) static const struct ibmvnic_stat ibmvnic_stats[] = { {"rx_packets", IBMVNIC_STAT_OFF(rx_packets)}, @@ -1221,8 +1221,7 @@ static int ibmvnic_open(struct net_device *netdev) rc = __ibmvnic_open(netdev); out: - /* - * If open fails due to a pending failover, set device state and + /* If open fails due to a pending failover, set device state and * return. Device operation will be handled by reset routine. */ if (rc && adapter->failover_pending) { @@ -1946,8 +1945,7 @@ static int do_reset(struct ibmvnic_adapter *adapter, if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM)) rtnl_lock(); - /* - * Now that we have the rtnl lock, clear any pending failover. + /* Now that we have the rtnl lock, clear any pending failover. * This will ensure ibmvnic_open() has either completed or will * block until failover is complete. */ @@ -2042,9 +2040,8 @@ static int do_reset(struct ibmvnic_adapter *adapter, } rc = ibmvnic_login(netdev); - if (rc) { + if (rc) goto out; - } if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { rc = init_resources(adapter); @@ -2072,14 +2069,14 @@ static int do_reset(struct ibmvnic_adapter *adapter, rc = reset_tx_pools(adapter); if (rc) { netdev_dbg(adapter->netdev, "reset tx pools failed (%d)\n", - rc); + rc); goto out; } rc = reset_rx_pools(adapter); if (rc) { netdev_dbg(adapter->netdev, "reset rx pools failed (%d)\n", - rc); + rc); goto out; } } @@ -2249,8 +2246,7 @@ static void __ibmvnic_reset(struct work_struct *work) spin_unlock_irqrestore(&adapter->state_lock, flags); if (adapter->force_reset_recovery) { - /* - * Since we are doing a hard reset now, clear the + /* Since we are doing a hard reset now, clear the * failover_pending flag so we don't ignore any * future MOBILITY or other resets. */ @@ -2322,8 +2318,7 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, unsigned long flags; int ret; - /* - * If failover is pending don't schedule any other reset. + /* If failover is pending don't schedule any other reset. * Instead let the failover complete. If there is already a * a failover reset scheduled, we will detect and drop the * duplicate reset when walking the ->rwi_list below. @@ -2338,7 +2333,8 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, if (adapter->state == VNIC_PROBING) { netdev_warn(netdev, "Adapter reset during probe\n"); - ret = adapter->init_done_rc = EAGAIN; + adapter->init_done_rc = EAGAIN; + ret = EAGAIN; goto err; } @@ -2445,9 +2441,8 @@ restart_poll: if (!pending_scrq(adapter, rx_scrq)) break; next = ibmvnic_next_scrq(adapter, rx_scrq); - rx_buff = - (struct ibmvnic_rx_buff *)be64_to_cpu(next-> - rx_comp.correlator); + rx_buff = (struct ibmvnic_rx_buff *) + be64_to_cpu(next->rx_comp.correlator); /* do error checking */ if (next->rx_comp.rc) { netdev_dbg(netdev, "rx buffer returned with rc %x\n", @@ -2638,9 +2633,9 @@ static void ibmvnic_get_drvinfo(struct net_device *netdev, { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - strlcpy(info->driver, ibmvnic_driver_name, sizeof(info->driver)); - strlcpy(info->version, IBMVNIC_DRIVER_VERSION, sizeof(info->version)); - strlcpy(info->fw_version, adapter->fw_version, + strscpy(info->driver, ibmvnic_driver_name, sizeof(info->driver)); + strscpy(info->version, IBMVNIC_DRIVER_VERSION, sizeof(info->version)); + strscpy(info->fw_version, adapter->fw_version, sizeof(info->fw_version)); } @@ -2752,7 +2747,6 @@ static int ibmvnic_set_channels(struct net_device *netdev, channels->rx_count, channels->tx_count, adapter->req_rx_queues, adapter->req_tx_queues); return ret; - } static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) @@ -2841,8 +2835,8 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev, return; for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++) - data[i] = be64_to_cpu(IBMVNIC_GET_STAT(adapter, - ibmvnic_stats[i].offset)); + data[i] = be64_to_cpu(IBMVNIC_GET_STAT + (adapter, ibmvnic_stats[i].offset)); for (j = 0; j < adapter->req_tx_queues; j++) { data[i] = adapter->tx_stats_buffers[j].packets; @@ -2882,6 +2876,7 @@ static int ibmvnic_set_priv_flags(struct net_device *netdev, u32 flags) return 0; } + static const struct ethtool_ops ibmvnic_ethtool_ops = { .get_drvinfo = ibmvnic_get_drvinfo, .get_msglevel = ibmvnic_get_msglevel, @@ -3151,7 +3146,7 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter, /* H_EOI would fail with rc = H_FUNCTION when running * in XIVE mode which is expected, but not an error. */ - if (rc && (rc != H_FUNCTION)) + if (rc && rc != H_FUNCTION) dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", val, rc); } @@ -3654,8 +3649,8 @@ static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter, int rc; netdev_dbg(adapter->netdev, "Sending CRQ: %016lx %016lx\n", - (unsigned long int)cpu_to_be64(u64_crq[0]), - (unsigned long int)cpu_to_be64(u64_crq[1])); + (unsigned long)cpu_to_be64(u64_crq[0]), + (unsigned long)cpu_to_be64(u64_crq[1])); if (!adapter->crq.active && crq->generic.first != IBMVNIC_CRQ_INIT_CMD) { @@ -3860,15 +3855,15 @@ static int send_login(struct ibmvnic_adapter *adapter) for (i = 0; i < adapter->req_tx_queues; i++) { if (adapter->tx_scrq[i]) { - tx_list_p[i] = cpu_to_be64(adapter->tx_scrq[i]-> - crq_num); + tx_list_p[i] = + cpu_to_be64(adapter->tx_scrq[i]->crq_num); } } for (i = 0; i < adapter->req_rx_queues; i++) { if (adapter->rx_scrq[i]) { - rx_list_p[i] = cpu_to_be64(adapter->rx_scrq[i]-> - crq_num); + rx_list_p[i] = + cpu_to_be64(adapter->rx_scrq[i]->crq_num); } } @@ -3884,7 +3879,7 @@ static int send_login(struct ibmvnic_adapter *adapter) netdev_dbg(adapter->netdev, "Login Buffer:\n"); for (i = 0; i < (adapter->login_buf_sz - 1) / 8 + 1; i++) { netdev_dbg(adapter->netdev, "%016lx\n", - ((unsigned long int *)(adapter->login_buf))[i]); + ((unsigned long *)(adapter->login_buf))[i]); } memset(&crq, 0, sizeof(crq)); @@ -4252,7 +4247,7 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) netdev_dbg(adapter->netdev, "Query IP Offload Buffer:\n"); for (i = 0; i < (sizeof(adapter->ip_offload_buf) - 1) / 8 + 1; i++) netdev_dbg(adapter->netdev, "%016lx\n", - ((unsigned long int *)(buf))[i]); + ((unsigned long *)(buf))[i]); netdev_dbg(adapter->netdev, "ipv4_chksum = %d\n", buf->ipv4_chksum); netdev_dbg(adapter->netdev, "ipv6_chksum = %d\n", buf->ipv6_chksum); @@ -4411,8 +4406,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, case PARTIALSUCCESS: dev_info(dev, "req=%lld, rsp=%ld in %s queue, retrying.\n", *req_value, - (long int)be64_to_cpu(crq->request_capability_rsp. - number), name); + (long)be64_to_cpu(crq->request_capability_rsp.number), + name); if (be16_to_cpu(crq->request_capability_rsp.capability) == REQ_MTU) { @@ -4482,7 +4477,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, netdev_dbg(adapter->netdev, "Login Response Buffer:\n"); for (i = 0; i < (adapter->login_rsp_buf_sz - 1) / 8 + 1; i++) { netdev_dbg(adapter->netdev, "%016lx\n", - ((unsigned long int *)(adapter->login_rsp_buf))[i]); + ((unsigned long *)(adapter->login_rsp_buf))[i]); } /* Sanity checks */ @@ -4825,8 +4820,8 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, long rc; netdev_dbg(netdev, "Handling CRQ: %016lx %016lx\n", - (unsigned long int)cpu_to_be64(u64_crq[0]), - (unsigned long int)cpu_to_be64(u64_crq[1])); + (unsigned long)cpu_to_be64(u64_crq[0]), + (unsigned long)cpu_to_be64(u64_crq[1])); switch (gen_crq->first) { case IBMVNIC_CRQ_INIT_RSP: switch (gen_crq->cmd) { @@ -4842,7 +4837,22 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, complete(&adapter->init_done); adapter->init_done_rc = -EIO; } - ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); + rc = ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); + if (rc && rc != -EBUSY) { + /* We were unable to schedule the failover + * reset either because the adapter was still + * probing (eg: during kexec) or we could not + * allocate memory. Clear the failover_pending + * flag since no one else will. We ignore + * EBUSY because it means either FAILOVER reset + * is already scheduled or the adapter is + * being removed. + */ + netdev_err(netdev, + "Error %ld scheduling failover reset\n", + rc); + adapter->failover_pending = false; + } break; case IBMVNIC_CRQ_INIT_COMPLETE: dev_info(dev, "Partner initialization complete\n"); @@ -5285,8 +5295,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) netdev->ethtool_ops = &ibmvnic_ethtool_ops; SET_NETDEV_DEV(netdev, &dev->dev); - spin_lock_init(&adapter->stats_lock); - INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset); INIT_DELAYED_WORK(&adapter->ibmvnic_delayed_reset, __ibmvnic_delayed_reset); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index c09c3f6bba9f..270d1cac86a4 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -845,6 +845,7 @@ struct ibmvnic_crq_queue { union ibmvnic_crq *msgs; int size, cur; dma_addr_t msg_token; + /* Used for serialization of msgs, cur */ spinlock_t lock; bool active; char name[32]; @@ -876,6 +877,7 @@ struct ibmvnic_sub_crq_queue { unsigned int irq; unsigned int pool_index; int scrq_num; + /* Used for serialization of msgs, cur */ spinlock_t lock; struct sk_buff *rx_skb_top; struct ibmvnic_adapter *adapter; @@ -985,7 +987,6 @@ struct ibmvnic_adapter { struct ibmvnic_statistics stats; dma_addr_t stats_token; struct completion stats_done; - spinlock_t stats_lock; int replenish_no_mem; int replenish_add_buff_success; int replenish_add_buff_failure; @@ -1080,9 +1081,12 @@ struct ibmvnic_adapter { struct tasklet_struct tasklet; enum vnic_state state; + /* Used for serializatin of state field */ + spinlock_t state_lock; enum ibmvnic_reset_reason reset_reason; - spinlock_t rwi_lock; struct list_head rwi_list; + /* Used for serialization of rwi_list */ + spinlock_t rwi_lock; struct work_struct ibmvnic_reset; struct delayed_work ibmvnic_delayed_reset; unsigned long resetting; @@ -1096,7 +1100,4 @@ struct ibmvnic_adapter { struct ibmvnic_tunables desired; struct ibmvnic_tunables fallback; - - /* Used for serializatin of state field */ - spinlock_t state_lock; }; diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 118473dfdcbd..cd53981fa5e0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 2013 - 2018 Intel Corporation. */ +/* Copyright(c) 2013 - 2021 Intel Corporation. */ #ifndef _I40E_H_ #define _I40E_H_ @@ -213,14 +213,18 @@ struct i40e_fdir_filter { struct hlist_node fdir_node; /* filter ipnut set */ u8 flow_type; - u8 ip4_proto; + u8 ipl4_proto; /* TX packet view of src and dst */ __be32 dst_ip; __be32 src_ip; + __be32 dst_ip6[4]; + __be32 src_ip6[4]; __be16 src_port; __be16 dst_port; __be32 sctp_v_tag; + __be16 vlan_etype; + __be16 vlan_tag; /* Flexible data to match within the packet payload */ __be16 flex_word; u16 flex_offset; @@ -289,6 +293,9 @@ struct i40e_cloud_filter { u8 tunnel_type; }; +#define I40E_DCB_PRIO_TYPE_STRICT 0 +#define I40E_DCB_PRIO_TYPE_ETS 1 +#define I40E_DCB_STRICT_PRIO_CREDITS 127 /* DCB per TC information data structure */ struct i40e_tc_info { u16 qoffset; /* Queue offset from base queue */ @@ -474,6 +481,11 @@ struct i40e_pf { u16 fd_sctp4_filter_cnt; u16 fd_ip4_filter_cnt; + u16 fd_tcp6_filter_cnt; + u16 fd_udp6_filter_cnt; + u16 fd_sctp6_filter_cnt; + u16 fd_ip6_filter_cnt; + /* Flexible filter table values that need to be programmed into * hardware, which expects L3 and L4 to be programmed separately. We * need to ensure that the values are in ascended order and don't have @@ -626,6 +638,8 @@ struct i40e_pf { u16 dcbx_cap; struct i40e_filter_control_settings filter_settings; + struct i40e_rx_pb_config pb_cfg; /* Current Rx packet buffer config */ + struct i40e_dcbx_config tmp_cfg; struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_caps; @@ -1122,6 +1136,12 @@ bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi); int i40e_count_filters(struct i40e_vsi *vsi); struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr); void i40e_vlan_stripping_enable(struct i40e_vsi *vsi); +static inline bool i40e_is_sw_dcb(struct i40e_pf *pf) +{ + return !!(pf->flags & I40E_FLAG_DISABLE_FW_LLDP); +} + +void i40e_set_lldp_forwarding(struct i40e_pf *pf, bool enable); #ifdef CONFIG_I40E_DCB void i40e_dcbnl_flush_apps(struct i40e_pf *pf, struct i40e_dcbx_config *old_cfg, @@ -1131,6 +1151,8 @@ void i40e_dcbnl_setup(struct i40e_vsi *vsi); bool i40e_dcb_need_reconfig(struct i40e_pf *pf, struct i40e_dcbx_config *old_cfg, struct i40e_dcbx_config *new_cfg); +int i40e_hw_dcb_config(struct i40e_pf *pf, struct i40e_dcbx_config *new_cfg); +int i40e_dcb_sw_default_config(struct i40e_pf *pf); #endif /* CONFIG_I40E_DCB */ void i40e_ptp_rx_hang(struct i40e_pf *pf); void i40e_ptp_tx_hang(struct i40e_pf *pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 1e960c3c7ef0..ce626eace692 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 2013 - 2018 Intel Corporation. */ +/* Copyright(c) 2013 - 2021 Intel Corporation. */ #ifndef _I40E_ADMINQ_CMD_H_ #define _I40E_ADMINQ_CMD_H_ @@ -1080,6 +1080,7 @@ struct i40e_aqc_add_remove_control_packet_filter { #define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC 0x0001 #define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP 0x0002 #define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TX 0x0008 +#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_RX 0x0000 __le16 seid; __le16 queue; u8 reserved[2]; @@ -2184,6 +2185,14 @@ I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_get_cee_dcb_cfg_resp); * Used to replace the local MIB of a given LLDP agent. e.g. DCBx */ struct i40e_aqc_lldp_set_local_mib { +#define SET_LOCAL_MIB_AC_TYPE_DCBX_SHIFT 0 +#define SET_LOCAL_MIB_AC_TYPE_DCBX_MASK (1 << \ + SET_LOCAL_MIB_AC_TYPE_DCBX_SHIFT) +#define SET_LOCAL_MIB_AC_TYPE_LOCAL_MIB 0x0 +#define SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS_SHIFT (1) +#define SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS_MASK (1 << \ + SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS_SHIFT) +#define SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS 0x1 u8 type; u8 reserved0; __le16 length; diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index adc9e4fa4789..ec19e18305ec 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 2013 - 2018 Intel Corporation. */ +/* Copyright(c) 2013 - 2021 Intel Corporation. */ #include "i40e.h" #include "i40e_type.h" @@ -3662,6 +3662,46 @@ i40e_status i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type, } /** + * i40e_aq_set_lldp_mib - Set the LLDP MIB + * @hw: pointer to the hw struct + * @mib_type: Local, Remote or both Local and Remote MIBs + * @buff: pointer to a user supplied buffer to store the MIB block + * @buff_size: size of the buffer (in bytes) + * @cmd_details: pointer to command details structure or NULL + * + * Set the LLDP MIB. + **/ +enum i40e_status_code +i40e_aq_set_lldp_mib(struct i40e_hw *hw, + u8 mib_type, void *buff, u16 buff_size, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aqc_lldp_set_local_mib *cmd; + enum i40e_status_code status; + struct i40e_aq_desc desc; + + cmd = (struct i40e_aqc_lldp_set_local_mib *)&desc.params.raw; + if (buff_size == 0 || !buff) + return I40E_ERR_PARAM; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_lldp_set_local_mib); + /* Indirect Command */ + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); + if (buff_size > I40E_AQ_LARGE_BUF) + desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); + desc.datalen = cpu_to_le16(buff_size); + + cmd->type = mib_type; + cmd->length = cpu_to_le16(buff_size); + cmd->address_high = cpu_to_le32(upper_32_bits((uintptr_t)buff)); + cmd->address_low = cpu_to_le32(lower_32_bits((uintptr_t)buff)); + + status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details); + return status; +} + +/** * i40e_aq_cfg_lldp_mib_change_event * @hw: pointer to the hw struct * @enable_update: Enable or Disable event posting @@ -4480,6 +4520,29 @@ static i40e_status i40e_aq_alternate_read(struct i40e_hw *hw, } /** + * i40e_aq_suspend_port_tx + * @hw: pointer to the hardware structure + * @seid: port seid + * @cmd_details: pointer to command details structure or NULL + * + * Suspend port's Tx traffic + **/ +i40e_status i40e_aq_suspend_port_tx(struct i40e_hw *hw, u16 seid, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aqc_tx_sched_ind *cmd; + struct i40e_aq_desc desc; + i40e_status status; + + cmd = (struct i40e_aqc_tx_sched_ind *)&desc.params.raw; + i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_suspend_port_tx); + cmd->vsi_seid = cpu_to_le16(seid); + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** * i40e_aq_resume_port_tx * @hw: pointer to the hardware structure * @cmd_details: pointer to command details structure or NULL diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c index 9de503c5f99b..7b73a279d46e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 2013 - 2018 Intel Corporation. */ +/* Copyright(c) 2013 - 2021 Intel Corporation. */ #include "i40e_adminq.h" #include "i40e_prototype.h" @@ -933,6 +933,953 @@ i40e_status i40e_init_dcb(struct i40e_hw *hw, bool enable_mib_change) } /** + * i40e_get_fw_lldp_status + * @hw: pointer to the hw struct + * @lldp_status: pointer to the status enum + * + * Get status of FW Link Layer Discovery Protocol (LLDP) Agent. + * Status of agent is reported via @lldp_status parameter. + **/ +enum i40e_status_code +i40e_get_fw_lldp_status(struct i40e_hw *hw, + enum i40e_get_fw_lldp_status_resp *lldp_status) +{ + struct i40e_virt_mem mem; + i40e_status ret; + u8 *lldpmib; + + if (!lldp_status) + return I40E_ERR_PARAM; + + /* Allocate buffer for the LLDPDU */ + ret = i40e_allocate_virt_mem(hw, &mem, I40E_LLDPDU_SIZE); + if (ret) + return ret; + + lldpmib = (u8 *)mem.va; + ret = i40e_aq_get_lldp_mib(hw, 0, 0, (void *)lldpmib, + I40E_LLDPDU_SIZE, NULL, NULL, NULL); + + if (!ret) { + *lldp_status = I40E_GET_FW_LLDP_STATUS_ENABLED; + } else if (hw->aq.asq_last_status == I40E_AQ_RC_ENOENT) { + /* MIB is not available yet but the agent is running */ + *lldp_status = I40E_GET_FW_LLDP_STATUS_ENABLED; + ret = 0; + } else if (hw->aq.asq_last_status == I40E_AQ_RC_EPERM) { + *lldp_status = I40E_GET_FW_LLDP_STATUS_DISABLED; + ret = 0; + } + + i40e_free_virt_mem(hw, &mem); + return ret; +} + +/** + * i40e_add_ieee_ets_tlv - Prepare ETS TLV in IEEE format + * @tlv: Fill the ETS config data in IEEE format + * @dcbcfg: Local store which holds the DCB Config + * + * Prepare IEEE 802.1Qaz ETS CFG TLV + **/ +static void i40e_add_ieee_ets_tlv(struct i40e_lldp_org_tlv *tlv, + struct i40e_dcbx_config *dcbcfg) +{ + u8 priority0, priority1, maxtcwilling = 0; + struct i40e_dcb_ets_config *etscfg; + u16 offset = 0, typelength, i; + u8 *buf = tlv->tlvinfo; + u32 ouisubtype; + + typelength = (u16)((I40E_TLV_TYPE_ORG << I40E_LLDP_TLV_TYPE_SHIFT) | + I40E_IEEE_ETS_TLV_LENGTH); + tlv->typelength = htons(typelength); + + ouisubtype = (u32)((I40E_IEEE_8021QAZ_OUI << I40E_LLDP_TLV_OUI_SHIFT) | + I40E_IEEE_SUBTYPE_ETS_CFG); + tlv->ouisubtype = htonl(ouisubtype); + + /* First Octet post subtype + * -------------------------- + * |will-|CBS | Re- | Max | + * |ing | |served| TCs | + * -------------------------- + * |1bit | 1bit|3 bits|3bits| + */ + etscfg = &dcbcfg->etscfg; + if (etscfg->willing) + maxtcwilling = BIT(I40E_IEEE_ETS_WILLING_SHIFT); + maxtcwilling |= etscfg->maxtcs & I40E_IEEE_ETS_MAXTC_MASK; + buf[offset] = maxtcwilling; + + /* Move offset to Priority Assignment Table */ + offset++; + + /* Priority Assignment Table (4 octets) + * Octets:| 1 | 2 | 3 | 4 | + * ----------------------------------------- + * |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7| + * ----------------------------------------- + * Bits:|7 4|3 0|7 4|3 0|7 4|3 0|7 4|3 0| + * ----------------------------------------- + */ + for (i = 0; i < 4; i++) { + priority0 = etscfg->prioritytable[i * 2] & 0xF; + priority1 = etscfg->prioritytable[i * 2 + 1] & 0xF; + buf[offset] = (priority0 << I40E_IEEE_ETS_PRIO_1_SHIFT) | + priority1; + offset++; + } + + /* TC Bandwidth Table (8 octets) + * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | + * --------------------------------- + * |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7| + * --------------------------------- + */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) + buf[offset++] = etscfg->tcbwtable[i]; + + /* TSA Assignment Table (8 octets) + * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | + * --------------------------------- + * |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7| + * --------------------------------- + */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) + buf[offset++] = etscfg->tsatable[i]; +} + +/** + * i40e_add_ieee_etsrec_tlv - Prepare ETS Recommended TLV in IEEE format + * @tlv: Fill ETS Recommended TLV in IEEE format + * @dcbcfg: Local store which holds the DCB Config + * + * Prepare IEEE 802.1Qaz ETS REC TLV + **/ +static void i40e_add_ieee_etsrec_tlv(struct i40e_lldp_org_tlv *tlv, + struct i40e_dcbx_config *dcbcfg) +{ + struct i40e_dcb_ets_config *etsrec; + u16 offset = 0, typelength, i; + u8 priority0, priority1; + u8 *buf = tlv->tlvinfo; + u32 ouisubtype; + + typelength = (u16)((I40E_TLV_TYPE_ORG << I40E_LLDP_TLV_TYPE_SHIFT) | + I40E_IEEE_ETS_TLV_LENGTH); + tlv->typelength = htons(typelength); + + ouisubtype = (u32)((I40E_IEEE_8021QAZ_OUI << I40E_LLDP_TLV_OUI_SHIFT) | + I40E_IEEE_SUBTYPE_ETS_REC); + tlv->ouisubtype = htonl(ouisubtype); + + etsrec = &dcbcfg->etsrec; + /* First Octet is reserved */ + /* Move offset to Priority Assignment Table */ + offset++; + + /* Priority Assignment Table (4 octets) + * Octets:| 1 | 2 | 3 | 4 | + * ----------------------------------------- + * |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7| + * ----------------------------------------- + * Bits:|7 4|3 0|7 4|3 0|7 4|3 0|7 4|3 0| + * ----------------------------------------- + */ + for (i = 0; i < 4; i++) { + priority0 = etsrec->prioritytable[i * 2] & 0xF; + priority1 = etsrec->prioritytable[i * 2 + 1] & 0xF; + buf[offset] = (priority0 << I40E_IEEE_ETS_PRIO_1_SHIFT) | + priority1; + offset++; + } + + /* TC Bandwidth Table (8 octets) + * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | + * --------------------------------- + * |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7| + * --------------------------------- + */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) + buf[offset++] = etsrec->tcbwtable[i]; + + /* TSA Assignment Table (8 octets) + * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | + * --------------------------------- + * |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7| + * --------------------------------- + */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) + buf[offset++] = etsrec->tsatable[i]; +} + +/** + * i40e_add_ieee_pfc_tlv - Prepare PFC TLV in IEEE format + * @tlv: Fill PFC TLV in IEEE format + * @dcbcfg: Local store to get PFC CFG data + * + * Prepare IEEE 802.1Qaz PFC CFG TLV + **/ +static void i40e_add_ieee_pfc_tlv(struct i40e_lldp_org_tlv *tlv, + struct i40e_dcbx_config *dcbcfg) +{ + u8 *buf = tlv->tlvinfo; + u32 ouisubtype; + u16 typelength; + + typelength = (u16)((I40E_TLV_TYPE_ORG << I40E_LLDP_TLV_TYPE_SHIFT) | + I40E_IEEE_PFC_TLV_LENGTH); + tlv->typelength = htons(typelength); + + ouisubtype = (u32)((I40E_IEEE_8021QAZ_OUI << I40E_LLDP_TLV_OUI_SHIFT) | + I40E_IEEE_SUBTYPE_PFC_CFG); + tlv->ouisubtype = htonl(ouisubtype); + + /* ---------------------------------------- + * |will-|MBC | Re- | PFC | PFC Enable | + * |ing | |served| cap | | + * ----------------------------------------- + * |1bit | 1bit|2 bits|4bits| 1 octet | + */ + if (dcbcfg->pfc.willing) + buf[0] = BIT(I40E_IEEE_PFC_WILLING_SHIFT); + + if (dcbcfg->pfc.mbc) + buf[0] |= BIT(I40E_IEEE_PFC_MBC_SHIFT); + + buf[0] |= dcbcfg->pfc.pfccap & 0xF; + buf[1] = dcbcfg->pfc.pfcenable; +} + +/** + * i40e_add_ieee_app_pri_tlv - Prepare APP TLV in IEEE format + * @tlv: Fill APP TLV in IEEE format + * @dcbcfg: Local store to get APP CFG data + * + * Prepare IEEE 802.1Qaz APP CFG TLV + **/ +static void i40e_add_ieee_app_pri_tlv(struct i40e_lldp_org_tlv *tlv, + struct i40e_dcbx_config *dcbcfg) +{ + u16 typelength, length, offset = 0; + u8 priority, selector, i = 0; + u8 *buf = tlv->tlvinfo; + u32 ouisubtype; + + /* No APP TLVs then just return */ + if (dcbcfg->numapps == 0) + return; + ouisubtype = (u32)((I40E_IEEE_8021QAZ_OUI << I40E_LLDP_TLV_OUI_SHIFT) | + I40E_IEEE_SUBTYPE_APP_PRI); + tlv->ouisubtype = htonl(ouisubtype); + + /* Move offset to App Priority Table */ + offset++; + /* Application Priority Table (3 octets) + * Octets:| 1 | 2 | 3 | + * ----------------------------------------- + * |Priority|Rsrvd| Sel | Protocol ID | + * ----------------------------------------- + * Bits:|23 21|20 19|18 16|15 0| + * ----------------------------------------- + */ + while (i < dcbcfg->numapps) { + priority = dcbcfg->app[i].priority & 0x7; + selector = dcbcfg->app[i].selector & 0x7; + buf[offset] = (priority << I40E_IEEE_APP_PRIO_SHIFT) | selector; + buf[offset + 1] = (dcbcfg->app[i].protocolid >> 0x8) & 0xFF; + buf[offset + 2] = dcbcfg->app[i].protocolid & 0xFF; + /* Move to next app */ + offset += 3; + i++; + if (i >= I40E_DCBX_MAX_APPS) + break; + } + /* length includes size of ouisubtype + 1 reserved + 3*numapps */ + length = sizeof(tlv->ouisubtype) + 1 + (i * 3); + typelength = (u16)((I40E_TLV_TYPE_ORG << I40E_LLDP_TLV_TYPE_SHIFT) | + (length & 0x1FF)); + tlv->typelength = htons(typelength); +} + +/** + * i40e_add_dcb_tlv - Add all IEEE TLVs + * @tlv: pointer to org tlv + * @dcbcfg: pointer to modified dcbx config structure * + * @tlvid: tlv id to be added + * add tlv information + **/ +static void i40e_add_dcb_tlv(struct i40e_lldp_org_tlv *tlv, + struct i40e_dcbx_config *dcbcfg, + u16 tlvid) +{ + switch (tlvid) { + case I40E_IEEE_TLV_ID_ETS_CFG: + i40e_add_ieee_ets_tlv(tlv, dcbcfg); + break; + case I40E_IEEE_TLV_ID_ETS_REC: + i40e_add_ieee_etsrec_tlv(tlv, dcbcfg); + break; + case I40E_IEEE_TLV_ID_PFC_CFG: + i40e_add_ieee_pfc_tlv(tlv, dcbcfg); + break; + case I40E_IEEE_TLV_ID_APP_PRI: + i40e_add_ieee_app_pri_tlv(tlv, dcbcfg); + break; + default: + break; + } +} + +/** + * i40e_set_dcb_config - Set the local LLDP MIB to FW + * @hw: pointer to the hw struct + * + * Set DCB configuration to the Firmware + **/ +i40e_status i40e_set_dcb_config(struct i40e_hw *hw) +{ + struct i40e_dcbx_config *dcbcfg; + struct i40e_virt_mem mem; + u8 mib_type, *lldpmib; + i40e_status ret; + u16 miblen; + + /* update the hw local config */ + dcbcfg = &hw->local_dcbx_config; + /* Allocate the LLDPDU */ + ret = i40e_allocate_virt_mem(hw, &mem, I40E_LLDPDU_SIZE); + if (ret) + return ret; + + mib_type = SET_LOCAL_MIB_AC_TYPE_LOCAL_MIB; + if (dcbcfg->app_mode == I40E_DCBX_APPS_NON_WILLING) { + mib_type |= SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS << + SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS_SHIFT; + } + lldpmib = (u8 *)mem.va; + i40e_dcb_config_to_lldp(lldpmib, &miblen, dcbcfg); + ret = i40e_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, miblen, NULL); + + i40e_free_virt_mem(hw, &mem); + return ret; +} + +/** + * i40e_dcb_config_to_lldp - Convert Dcbconfig to MIB format + * @lldpmib: pointer to mib to be output + * @miblen: pointer to u16 for length of lldpmib + * @dcbcfg: store for LLDPDU data + * + * send DCB configuration to FW + **/ +i40e_status i40e_dcb_config_to_lldp(u8 *lldpmib, u16 *miblen, + struct i40e_dcbx_config *dcbcfg) +{ + u16 length, offset = 0, tlvid, typelength; + struct i40e_lldp_org_tlv *tlv; + + tlv = (struct i40e_lldp_org_tlv *)lldpmib; + tlvid = I40E_TLV_ID_START; + do { + i40e_add_dcb_tlv(tlv, dcbcfg, tlvid++); + typelength = ntohs(tlv->typelength); + length = (u16)((typelength & I40E_LLDP_TLV_LEN_MASK) >> + I40E_LLDP_TLV_LEN_SHIFT); + if (length) + offset += length + I40E_IEEE_TLV_HEADER_LENGTH; + /* END TLV or beyond LLDPDU size */ + if (tlvid >= I40E_TLV_ID_END_OF_LLDPPDU || + offset >= I40E_LLDPDU_SIZE) + break; + /* Move to next TLV */ + if (length) + tlv = (struct i40e_lldp_org_tlv *)((char *)tlv + + sizeof(tlv->typelength) + length); + } while (tlvid < I40E_TLV_ID_END_OF_LLDPPDU); + *miblen = offset; + return I40E_SUCCESS; +} + +/** + * i40e_dcb_hw_rx_fifo_config + * @hw: pointer to the hw struct + * @ets_mode: Strict Priority or Round Robin mode + * @non_ets_mode: Strict Priority or Round Robin + * @max_exponent: Exponent to calculate max refill credits + * @lltc_map: Low latency TC bitmap + * + * Configure HW Rx FIFO as part of DCB configuration. + **/ +void i40e_dcb_hw_rx_fifo_config(struct i40e_hw *hw, + enum i40e_dcb_arbiter_mode ets_mode, + enum i40e_dcb_arbiter_mode non_ets_mode, + u32 max_exponent, + u8 lltc_map) +{ + u32 reg = rd32(hw, I40E_PRTDCB_RETSC); + + reg &= ~I40E_PRTDCB_RETSC_ETS_MODE_MASK; + reg |= ((u32)ets_mode << I40E_PRTDCB_RETSC_ETS_MODE_SHIFT) & + I40E_PRTDCB_RETSC_ETS_MODE_MASK; + + reg &= ~I40E_PRTDCB_RETSC_NON_ETS_MODE_MASK; + reg |= ((u32)non_ets_mode << I40E_PRTDCB_RETSC_NON_ETS_MODE_SHIFT) & + I40E_PRTDCB_RETSC_NON_ETS_MODE_MASK; + + reg &= ~I40E_PRTDCB_RETSC_ETS_MAX_EXP_MASK; + reg |= (max_exponent << I40E_PRTDCB_RETSC_ETS_MAX_EXP_SHIFT) & + I40E_PRTDCB_RETSC_ETS_MAX_EXP_MASK; + + reg &= ~I40E_PRTDCB_RETSC_LLTC_MASK; + reg |= (lltc_map << I40E_PRTDCB_RETSC_LLTC_SHIFT) & + I40E_PRTDCB_RETSC_LLTC_MASK; + wr32(hw, I40E_PRTDCB_RETSC, reg); +} + +/** + * i40e_dcb_hw_rx_cmd_monitor_config + * @hw: pointer to the hw struct + * @num_tc: Total number of traffic class + * @num_ports: Total number of ports on device + * + * Configure HW Rx command monitor as part of DCB configuration. + **/ +void i40e_dcb_hw_rx_cmd_monitor_config(struct i40e_hw *hw, + u8 num_tc, u8 num_ports) +{ + u32 threshold; + u32 fifo_size; + u32 reg; + + /* Set the threshold and fifo_size based on number of ports */ + switch (num_ports) { + case 1: + threshold = I40E_DCB_1_PORT_THRESHOLD; + fifo_size = I40E_DCB_1_PORT_FIFO_SIZE; + break; + case 2: + if (num_tc > 4) { + threshold = I40E_DCB_2_PORT_THRESHOLD_HIGH_NUM_TC; + fifo_size = I40E_DCB_2_PORT_FIFO_SIZE_HIGH_NUM_TC; + } else { + threshold = I40E_DCB_2_PORT_THRESHOLD_LOW_NUM_TC; + fifo_size = I40E_DCB_2_PORT_FIFO_SIZE_LOW_NUM_TC; + } + break; + case 4: + if (num_tc > 4) { + threshold = I40E_DCB_4_PORT_THRESHOLD_HIGH_NUM_TC; + fifo_size = I40E_DCB_4_PORT_FIFO_SIZE_HIGH_NUM_TC; + } else { + threshold = I40E_DCB_4_PORT_THRESHOLD_LOW_NUM_TC; + fifo_size = I40E_DCB_4_PORT_FIFO_SIZE_LOW_NUM_TC; + } + break; + default: + i40e_debug(hw, I40E_DEBUG_DCB, "Invalid num_ports %u.\n", + (u32)num_ports); + return; + } + + /* The hardware manual describes setting up of I40E_PRT_SWR_PM_THR + * based on the number of ports and traffic classes for a given port as + * part of DCB configuration. + */ + reg = rd32(hw, I40E_PRT_SWR_PM_THR); + reg &= ~I40E_PRT_SWR_PM_THR_THRESHOLD_MASK; + reg |= (threshold << I40E_PRT_SWR_PM_THR_THRESHOLD_SHIFT) & + I40E_PRT_SWR_PM_THR_THRESHOLD_MASK; + wr32(hw, I40E_PRT_SWR_PM_THR, reg); + + reg = rd32(hw, I40E_PRTDCB_RPPMC); + reg &= ~I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_MASK; + reg |= (fifo_size << I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_SHIFT) & + I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_MASK; + wr32(hw, I40E_PRTDCB_RPPMC, reg); +} + +/** + * i40e_dcb_hw_pfc_config + * @hw: pointer to the hw struct + * @pfc_en: Bitmap of PFC enabled priorities + * @prio_tc: priority to tc assignment indexed by priority + * + * Configure HW Priority Flow Controller as part of DCB configuration. + **/ +void i40e_dcb_hw_pfc_config(struct i40e_hw *hw, + u8 pfc_en, u8 *prio_tc) +{ + u16 refresh_time = (u16)I40E_DEFAULT_PAUSE_TIME / 2; + u32 link_speed = hw->phy.link_info.link_speed; + u8 first_pfc_prio = 0; + u8 num_pfc_tc = 0; + u8 tc2pfc = 0; + u32 reg; + u8 i; + + /* Get Number of PFC TCs and TC2PFC map */ + for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { + if (pfc_en & BIT(i)) { + if (!first_pfc_prio) + first_pfc_prio = i; + /* Set bit for the PFC TC */ + tc2pfc |= BIT(prio_tc[i]); + num_pfc_tc++; + } + } + + switch (link_speed) { + case I40E_LINK_SPEED_10GB: + reg = rd32(hw, I40E_PRTDCB_MFLCN); + reg |= BIT(I40E_PRTDCB_MFLCN_DPF_SHIFT) & + I40E_PRTDCB_MFLCN_DPF_MASK; + reg &= ~I40E_PRTDCB_MFLCN_RFCE_MASK; + reg &= ~I40E_PRTDCB_MFLCN_RPFCE_MASK; + if (pfc_en) { + reg |= BIT(I40E_PRTDCB_MFLCN_RPFCM_SHIFT) & + I40E_PRTDCB_MFLCN_RPFCM_MASK; + reg |= ((u32)pfc_en << I40E_PRTDCB_MFLCN_RPFCE_SHIFT) & + I40E_PRTDCB_MFLCN_RPFCE_MASK; + } + wr32(hw, I40E_PRTDCB_MFLCN, reg); + + reg = rd32(hw, I40E_PRTDCB_FCCFG); + reg &= ~I40E_PRTDCB_FCCFG_TFCE_MASK; + if (pfc_en) + reg |= (I40E_DCB_PFC_ENABLED << + I40E_PRTDCB_FCCFG_TFCE_SHIFT) & + I40E_PRTDCB_FCCFG_TFCE_MASK; + wr32(hw, I40E_PRTDCB_FCCFG, reg); + + /* FCTTV and FCRTV to be set by default */ + break; + case I40E_LINK_SPEED_40GB: + reg = rd32(hw, I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP); + reg &= ~I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP_MASK; + wr32(hw, I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP, reg); + + reg = rd32(hw, I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP); + reg &= ~I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP_MASK; + reg |= BIT(I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP_SHIFT) & + I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP_MASK; + wr32(hw, I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP, reg); + + reg = rd32(hw, I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE); + reg &= ~I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_MASK; + reg |= ((u32)pfc_en << + I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_SHIFT) & + I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_MASK; + wr32(hw, I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE, reg); + + reg = rd32(hw, I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE); + reg &= ~I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_MASK; + reg |= ((u32)pfc_en << + I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_SHIFT) & + I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_MASK; + wr32(hw, I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE, reg); + + for (i = 0; i < I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MAX_INDEX; i++) { + reg = rd32(hw, I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(i)); + reg &= ~I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MASK; + if (pfc_en) { + reg |= ((u32)refresh_time << + I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_SHIFT) & + I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MASK; + } + wr32(hw, I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(i), reg); + } + /* PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA default value is 0xFFFF + * for all user priorities + */ + break; + } + + reg = rd32(hw, I40E_PRTDCB_TC2PFC); + reg &= ~I40E_PRTDCB_TC2PFC_TC2PFC_MASK; + reg |= ((u32)tc2pfc << I40E_PRTDCB_TC2PFC_TC2PFC_SHIFT) & + I40E_PRTDCB_TC2PFC_TC2PFC_MASK; + wr32(hw, I40E_PRTDCB_TC2PFC, reg); + + reg = rd32(hw, I40E_PRTDCB_RUP); + reg &= ~I40E_PRTDCB_RUP_NOVLANUP_MASK; + reg |= ((u32)first_pfc_prio << I40E_PRTDCB_RUP_NOVLANUP_SHIFT) & + I40E_PRTDCB_RUP_NOVLANUP_MASK; + wr32(hw, I40E_PRTDCB_RUP, reg); + + reg = rd32(hw, I40E_PRTDCB_TDPMC); + reg &= ~I40E_PRTDCB_TDPMC_TCPM_MODE_MASK; + if (num_pfc_tc > I40E_DCB_PFC_FORCED_NUM_TC) { + reg |= BIT(I40E_PRTDCB_TDPMC_TCPM_MODE_SHIFT) & + I40E_PRTDCB_TDPMC_TCPM_MODE_MASK; + } + wr32(hw, I40E_PRTDCB_TDPMC, reg); + + reg = rd32(hw, I40E_PRTDCB_TCPMC); + reg &= ~I40E_PRTDCB_TCPMC_TCPM_MODE_MASK; + if (num_pfc_tc > I40E_DCB_PFC_FORCED_NUM_TC) { + reg |= BIT(I40E_PRTDCB_TCPMC_TCPM_MODE_SHIFT) & + I40E_PRTDCB_TCPMC_TCPM_MODE_MASK; + } + wr32(hw, I40E_PRTDCB_TCPMC, reg); +} + +/** + * i40e_dcb_hw_set_num_tc + * @hw: pointer to the hw struct + * @num_tc: number of traffic classes + * + * Configure number of traffic classes in HW + **/ +void i40e_dcb_hw_set_num_tc(struct i40e_hw *hw, u8 num_tc) +{ + u32 reg = rd32(hw, I40E_PRTDCB_GENC); + + reg &= ~I40E_PRTDCB_GENC_NUMTC_MASK; + reg |= ((u32)num_tc << I40E_PRTDCB_GENC_NUMTC_SHIFT) & + I40E_PRTDCB_GENC_NUMTC_MASK; + wr32(hw, I40E_PRTDCB_GENC, reg); +} + +/** + * i40e_dcb_hw_get_num_tc + * @hw: pointer to the hw struct + * + * Returns number of traffic classes configured in HW + **/ +u8 i40e_dcb_hw_get_num_tc(struct i40e_hw *hw) +{ + u32 reg = rd32(hw, I40E_PRTDCB_GENC); + + return (u8)((reg & I40E_PRTDCB_GENC_NUMTC_MASK) >> + I40E_PRTDCB_GENC_NUMTC_SHIFT); +} + +/** + * i40e_dcb_hw_rx_ets_bw_config + * @hw: pointer to the hw struct + * @bw_share: Bandwidth share indexed per traffic class + * @mode: Strict Priority or Round Robin mode between UP sharing same + * traffic class + * @prio_type: TC is ETS enabled or strict priority + * + * Configure HW Rx ETS bandwidth as part of DCB configuration. + **/ +void i40e_dcb_hw_rx_ets_bw_config(struct i40e_hw *hw, u8 *bw_share, + u8 *mode, u8 *prio_type) +{ + u32 reg; + u8 i; + + for (i = 0; i <= I40E_PRTDCB_RETSTCC_MAX_INDEX; i++) { + reg = rd32(hw, I40E_PRTDCB_RETSTCC(i)); + reg &= ~(I40E_PRTDCB_RETSTCC_BWSHARE_MASK | + I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK | + I40E_PRTDCB_RETSTCC_ETSTC_SHIFT); + reg |= ((u32)bw_share[i] << I40E_PRTDCB_RETSTCC_BWSHARE_SHIFT) & + I40E_PRTDCB_RETSTCC_BWSHARE_MASK; + reg |= ((u32)mode[i] << I40E_PRTDCB_RETSTCC_UPINTC_MODE_SHIFT) & + I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK; + reg |= ((u32)prio_type[i] << I40E_PRTDCB_RETSTCC_ETSTC_SHIFT) & + I40E_PRTDCB_RETSTCC_ETSTC_MASK; + wr32(hw, I40E_PRTDCB_RETSTCC(i), reg); + } +} + +/** + * i40e_dcb_hw_rx_ets_bw_config + * @hw: pointer to the hw struct + * @prio_tc: priority to tc assignment indexed by priority + * + * Configure HW Rx UP2TC map as part of DCB configuration. + **/ +void i40e_dcb_hw_rx_up2tc_config(struct i40e_hw *hw, u8 *prio_tc) +{ + u32 reg = rd32(hw, I40E_PRTDCB_RUP2TC); +#define I40E_UP2TC_REG(val, i) \ + (((val) << I40E_PRTDCB_RUP2TC_UP##i##TC_SHIFT) & \ + I40E_PRTDCB_RUP2TC_UP##i##TC_MASK) + + reg |= I40E_UP2TC_REG(prio_tc[0], 0); + reg |= I40E_UP2TC_REG(prio_tc[1], 1); + reg |= I40E_UP2TC_REG(prio_tc[2], 2); + reg |= I40E_UP2TC_REG(prio_tc[3], 3); + reg |= I40E_UP2TC_REG(prio_tc[4], 4); + reg |= I40E_UP2TC_REG(prio_tc[5], 5); + reg |= I40E_UP2TC_REG(prio_tc[6], 6); + reg |= I40E_UP2TC_REG(prio_tc[7], 7); + + wr32(hw, I40E_PRTDCB_RUP2TC, reg); +} + +/** + * i40e_dcb_hw_calculate_pool_sizes - configure dcb pool sizes + * @hw: pointer to the hw struct + * @num_ports: Number of available ports on the device + * @eee_enabled: EEE enabled for the given port + * @pfc_en: Bit map of PFC enabled traffic classes + * @mfs_tc: Array of max frame size for each traffic class + * @pb_cfg: pointer to packet buffer configuration + * + * Calculate the shared and dedicated per TC pool sizes, + * watermarks and threshold values. + **/ +void i40e_dcb_hw_calculate_pool_sizes(struct i40e_hw *hw, + u8 num_ports, bool eee_enabled, + u8 pfc_en, u32 *mfs_tc, + struct i40e_rx_pb_config *pb_cfg) +{ + u32 pool_size[I40E_MAX_TRAFFIC_CLASS]; + u32 high_wm[I40E_MAX_TRAFFIC_CLASS]; + u32 low_wm[I40E_MAX_TRAFFIC_CLASS]; + u32 total_pool_size = 0; + int shared_pool_size; /* Need signed variable */ + u32 port_pb_size; + u32 mfs_max; + u32 pcirtt; + u8 i; + + /* Get the MFS(max) for the port */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + if (mfs_tc[i] > mfs_max) + mfs_max = mfs_tc[i]; + } + + pcirtt = I40E_BT2B(I40E_PCIRTT_LINK_SPEED_10G); + + /* Calculate effective Rx PB size per port */ + port_pb_size = I40E_DEVICE_RPB_SIZE / num_ports; + if (eee_enabled) + port_pb_size -= I40E_BT2B(I40E_EEE_TX_LPI_EXIT_TIME); + port_pb_size -= mfs_max; + + /* Step 1 Calculating tc pool/shared pool sizes and watermarks */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + if (pfc_en & BIT(i)) { + low_wm[i] = (I40E_DCB_WATERMARK_START_FACTOR * + mfs_tc[i]) + pcirtt; + high_wm[i] = low_wm[i]; + high_wm[i] += ((mfs_max > I40E_MAX_FRAME_SIZE) + ? mfs_max : I40E_MAX_FRAME_SIZE); + pool_size[i] = high_wm[i]; + pool_size[i] += I40E_BT2B(I40E_STD_DV_TC(mfs_max, + mfs_tc[i])); + } else { + low_wm[i] = 0; + pool_size[i] = (I40E_DCB_WATERMARK_START_FACTOR * + mfs_tc[i]) + pcirtt; + high_wm[i] = pool_size[i]; + } + total_pool_size += pool_size[i]; + } + + shared_pool_size = port_pb_size - total_pool_size; + if (shared_pool_size > 0) { + pb_cfg->shared_pool_size = shared_pool_size; + pb_cfg->shared_pool_high_wm = shared_pool_size; + pb_cfg->shared_pool_low_wm = 0; + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + pb_cfg->shared_pool_low_thresh[i] = 0; + pb_cfg->shared_pool_high_thresh[i] = shared_pool_size; + pb_cfg->tc_pool_size[i] = pool_size[i]; + pb_cfg->tc_pool_high_wm[i] = high_wm[i]; + pb_cfg->tc_pool_low_wm[i] = low_wm[i]; + } + + } else { + i40e_debug(hw, I40E_DEBUG_DCB, + "The shared pool size for the port is negative %d.\n", + shared_pool_size); + } +} + +/** + * i40e_dcb_hw_rx_pb_config + * @hw: pointer to the hw struct + * @old_pb_cfg: Existing Rx Packet buffer configuration + * @new_pb_cfg: New Rx Packet buffer configuration + * + * Program the Rx Packet Buffer registers. + **/ +void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw, + struct i40e_rx_pb_config *old_pb_cfg, + struct i40e_rx_pb_config *new_pb_cfg) +{ + u32 old_val; + u32 new_val; + u32 reg; + u8 i; + + /* The Rx Packet buffer register programming needs to be done in a + * certain order and the following code is based on that + * requirement. + */ + + /* Program the shared pool low water mark per port if decreasing */ + old_val = old_pb_cfg->shared_pool_low_wm; + new_val = new_pb_cfg->shared_pool_low_wm; + if (new_val < old_val) { + reg = rd32(hw, I40E_PRTRPB_SLW); + reg &= ~I40E_PRTRPB_SLW_SLW_MASK; + reg |= (new_val << I40E_PRTRPB_SLW_SLW_SHIFT) & + I40E_PRTRPB_SLW_SLW_MASK; + wr32(hw, I40E_PRTRPB_SLW, reg); + } + + /* Program the shared pool low threshold and tc pool + * low water mark per TC that are decreasing. + */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + old_val = old_pb_cfg->shared_pool_low_thresh[i]; + new_val = new_pb_cfg->shared_pool_low_thresh[i]; + if (new_val < old_val) { + reg = rd32(hw, I40E_PRTRPB_SLT(i)); + reg &= ~I40E_PRTRPB_SLT_SLT_TCN_MASK; + reg |= (new_val << I40E_PRTRPB_SLT_SLT_TCN_SHIFT) & + I40E_PRTRPB_SLT_SLT_TCN_MASK; + wr32(hw, I40E_PRTRPB_SLT(i), reg); + } + + old_val = old_pb_cfg->tc_pool_low_wm[i]; + new_val = new_pb_cfg->tc_pool_low_wm[i]; + if (new_val < old_val) { + reg = rd32(hw, I40E_PRTRPB_DLW(i)); + reg &= ~I40E_PRTRPB_DLW_DLW_TCN_MASK; + reg |= (new_val << I40E_PRTRPB_DLW_DLW_TCN_SHIFT) & + I40E_PRTRPB_DLW_DLW_TCN_MASK; + wr32(hw, I40E_PRTRPB_DLW(i), reg); + } + } + + /* Program the shared pool high water mark per port if decreasing */ + old_val = old_pb_cfg->shared_pool_high_wm; + new_val = new_pb_cfg->shared_pool_high_wm; + if (new_val < old_val) { + reg = rd32(hw, I40E_PRTRPB_SHW); + reg &= ~I40E_PRTRPB_SHW_SHW_MASK; + reg |= (new_val << I40E_PRTRPB_SHW_SHW_SHIFT) & + I40E_PRTRPB_SHW_SHW_MASK; + wr32(hw, I40E_PRTRPB_SHW, reg); + } + + /* Program the shared pool high threshold and tc pool + * high water mark per TC that are decreasing. + */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + old_val = old_pb_cfg->shared_pool_high_thresh[i]; + new_val = new_pb_cfg->shared_pool_high_thresh[i]; + if (new_val < old_val) { + reg = rd32(hw, I40E_PRTRPB_SHT(i)); + reg &= ~I40E_PRTRPB_SHT_SHT_TCN_MASK; + reg |= (new_val << I40E_PRTRPB_SHT_SHT_TCN_SHIFT) & + I40E_PRTRPB_SHT_SHT_TCN_MASK; + wr32(hw, I40E_PRTRPB_SHT(i), reg); + } + + old_val = old_pb_cfg->tc_pool_high_wm[i]; + new_val = new_pb_cfg->tc_pool_high_wm[i]; + if (new_val < old_val) { + reg = rd32(hw, I40E_PRTRPB_DHW(i)); + reg &= ~I40E_PRTRPB_DHW_DHW_TCN_MASK; + reg |= (new_val << I40E_PRTRPB_DHW_DHW_TCN_SHIFT) & + I40E_PRTRPB_DHW_DHW_TCN_MASK; + wr32(hw, I40E_PRTRPB_DHW(i), reg); + } + } + + /* Write Dedicated Pool Sizes per TC */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + new_val = new_pb_cfg->tc_pool_size[i]; + reg = rd32(hw, I40E_PRTRPB_DPS(i)); + reg &= ~I40E_PRTRPB_DPS_DPS_TCN_MASK; + reg |= (new_val << I40E_PRTRPB_DPS_DPS_TCN_SHIFT) & + I40E_PRTRPB_DPS_DPS_TCN_MASK; + wr32(hw, I40E_PRTRPB_DPS(i), reg); + } + + /* Write Shared Pool Size per port */ + new_val = new_pb_cfg->shared_pool_size; + reg = rd32(hw, I40E_PRTRPB_SPS); + reg &= ~I40E_PRTRPB_SPS_SPS_MASK; + reg |= (new_val << I40E_PRTRPB_SPS_SPS_SHIFT) & + I40E_PRTRPB_SPS_SPS_MASK; + wr32(hw, I40E_PRTRPB_SPS, reg); + + /* Program the shared pool low water mark per port if increasing */ + old_val = old_pb_cfg->shared_pool_low_wm; + new_val = new_pb_cfg->shared_pool_low_wm; + if (new_val > old_val) { + reg = rd32(hw, I40E_PRTRPB_SLW); + reg &= ~I40E_PRTRPB_SLW_SLW_MASK; + reg |= (new_val << I40E_PRTRPB_SLW_SLW_SHIFT) & + I40E_PRTRPB_SLW_SLW_MASK; + wr32(hw, I40E_PRTRPB_SLW, reg); + } + + /* Program the shared pool low threshold and tc pool + * low water mark per TC that are increasing. + */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + old_val = old_pb_cfg->shared_pool_low_thresh[i]; + new_val = new_pb_cfg->shared_pool_low_thresh[i]; + if (new_val > old_val) { + reg = rd32(hw, I40E_PRTRPB_SLT(i)); + reg &= ~I40E_PRTRPB_SLT_SLT_TCN_MASK; + reg |= (new_val << I40E_PRTRPB_SLT_SLT_TCN_SHIFT) & + I40E_PRTRPB_SLT_SLT_TCN_MASK; + wr32(hw, I40E_PRTRPB_SLT(i), reg); + } + + old_val = old_pb_cfg->tc_pool_low_wm[i]; + new_val = new_pb_cfg->tc_pool_low_wm[i]; + if (new_val > old_val) { + reg = rd32(hw, I40E_PRTRPB_DLW(i)); + reg &= ~I40E_PRTRPB_DLW_DLW_TCN_MASK; + reg |= (new_val << I40E_PRTRPB_DLW_DLW_TCN_SHIFT) & + I40E_PRTRPB_DLW_DLW_TCN_MASK; + wr32(hw, I40E_PRTRPB_DLW(i), reg); + } + } + + /* Program the shared pool high water mark per port if increasing */ + old_val = old_pb_cfg->shared_pool_high_wm; + new_val = new_pb_cfg->shared_pool_high_wm; + if (new_val > old_val) { + reg = rd32(hw, I40E_PRTRPB_SHW); + reg &= ~I40E_PRTRPB_SHW_SHW_MASK; + reg |= (new_val << I40E_PRTRPB_SHW_SHW_SHIFT) & + I40E_PRTRPB_SHW_SHW_MASK; + wr32(hw, I40E_PRTRPB_SHW, reg); + } + + /* Program the shared pool high threshold and tc pool + * high water mark per TC that are increasing. + */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + old_val = old_pb_cfg->shared_pool_high_thresh[i]; + new_val = new_pb_cfg->shared_pool_high_thresh[i]; + if (new_val > old_val) { + reg = rd32(hw, I40E_PRTRPB_SHT(i)); + reg &= ~I40E_PRTRPB_SHT_SHT_TCN_MASK; + reg |= (new_val << I40E_PRTRPB_SHT_SHT_TCN_SHIFT) & + I40E_PRTRPB_SHT_SHT_TCN_MASK; + wr32(hw, I40E_PRTRPB_SHT(i), reg); + } + + old_val = old_pb_cfg->tc_pool_high_wm[i]; + new_val = new_pb_cfg->tc_pool_high_wm[i]; + if (new_val > old_val) { + reg = rd32(hw, I40E_PRTRPB_DHW(i)); + reg &= ~I40E_PRTRPB_DHW_DHW_TCN_MASK; + reg |= (new_val << I40E_PRTRPB_DHW_DHW_TCN_SHIFT) & + I40E_PRTRPB_DHW_DHW_TCN_MASK; + wr32(hw, I40E_PRTRPB_DHW(i), reg); + } + } +} + +/** * _i40e_read_lldp_cfg - generic read of LLDP Configuration data from NVM * @hw: pointer to the HW structure * @lldp_cfg: pointer to hold lldp configuration variables diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.h b/drivers/net/ethernet/intel/i40e/i40e_dcb.h index 2b1a2e81ac73..2370ceecb061 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.h +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.h @@ -1,13 +1,15 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 2013 - 2018 Intel Corporation. */ +/* Copyright(c) 2013 - 2021 Intel Corporation. */ #ifndef _I40E_DCB_H_ #define _I40E_DCB_H_ #include "i40e_type.h" +#define I40E_DCBX_STATUS_NOT_STARTED 0 #define I40E_DCBX_STATUS_IN_PROGRESS 1 #define I40E_DCBX_STATUS_DONE 2 +#define I40E_DCBX_STATUS_MULTIPLE_PEERS 3 #define I40E_DCBX_STATUS_DISABLED 7 #define I40E_TLV_TYPE_END 0 @@ -22,6 +24,7 @@ #define I40E_CEE_DCBX_OUI 0x001b21 #define I40E_CEE_DCBX_TYPE 2 +#define I40E_CEE_SUBTYPE_CTRL 1 #define I40E_CEE_SUBTYPE_PG_CFG 2 #define I40E_CEE_SUBTYPE_PFC_CFG 3 #define I40E_CEE_SUBTYPE_APP_PRI 4 @@ -64,6 +67,8 @@ #define I40E_IEEE_TSA_ETS 2 /* Defines for IEEE PFC TLV */ +#define I40E_DCB_PFC_ENABLED 2 +#define I40E_DCB_PFC_FORCED_NUM_TC 2 #define I40E_IEEE_PFC_CAP_SHIFT 0 #define I40E_IEEE_PFC_CAP_MASK (0xF << I40E_IEEE_PFC_CAP_SHIFT) #define I40E_IEEE_PFC_MBC_SHIFT 6 @@ -77,9 +82,30 @@ #define I40E_IEEE_APP_PRIO_SHIFT 5 #define I40E_IEEE_APP_PRIO_MASK (0x7 << I40E_IEEE_APP_PRIO_SHIFT) +/* TLV definitions for preparing MIB */ +#define I40E_TLV_ID_CHASSIS_ID 0 +#define I40E_TLV_ID_PORT_ID 1 +#define I40E_TLV_ID_TIME_TO_LIVE 2 +#define I40E_IEEE_TLV_ID_ETS_CFG 3 +#define I40E_IEEE_TLV_ID_ETS_REC 4 +#define I40E_IEEE_TLV_ID_PFC_CFG 5 +#define I40E_IEEE_TLV_ID_APP_PRI 6 +#define I40E_TLV_ID_END_OF_LLDPPDU 7 +#define I40E_TLV_ID_START I40E_IEEE_TLV_ID_ETS_CFG -#pragma pack(1) +#define I40E_IEEE_TLV_HEADER_LENGTH 2 +#define I40E_IEEE_ETS_TLV_LENGTH 25 +#define I40E_IEEE_PFC_TLV_LENGTH 6 +#define I40E_IEEE_APP_TLV_LENGTH 11 + +/* Defines for default SW DCB config */ +#define I40E_IEEE_DEFAULT_ETS_TCBW 100 +#define I40E_IEEE_DEFAULT_ETS_WILLING 1 +#define I40E_IEEE_DEFAULT_PFC_WILLING 1 +#define I40E_IEEE_DEFAULT_NUM_APPS 1 +#define I40E_IEEE_DEFAULT_APP_PRIO 3 +#pragma pack(1) /* IEEE 802.1AB LLDP Organization specific TLV */ struct i40e_lldp_org_tlv { __be16 typelength; @@ -102,7 +128,9 @@ struct i40e_cee_ctrl_tlv { struct i40e_cee_feat_tlv { struct i40e_cee_tlv_hdr hdr; u8 en_will_err; /* Bits: |En|Will|Err|Reserved(5)| */ +#define I40E_CEE_FEAT_TLV_ENABLE_MASK 0x80 #define I40E_CEE_FEAT_TLV_WILLING_MASK 0x40 +#define I40E_CEE_FEAT_TLV_ERR_MASK 0x20 u8 subtype; u8 tlvinfo[1]; }; @@ -116,13 +144,140 @@ struct i40e_cee_app_prio { }; #pragma pack() +enum i40e_get_fw_lldp_status_resp { + I40E_GET_FW_LLDP_STATUS_DISABLED = 0, + I40E_GET_FW_LLDP_STATUS_ENABLED = 1 +}; + +/* Data structures to pass for SW DCBX */ +struct i40e_rx_pb_config { + u32 shared_pool_size; + u32 shared_pool_high_wm; + u32 shared_pool_low_wm; + u32 shared_pool_high_thresh[I40E_MAX_TRAFFIC_CLASS]; + u32 shared_pool_low_thresh[I40E_MAX_TRAFFIC_CLASS]; + u32 tc_pool_size[I40E_MAX_TRAFFIC_CLASS]; + u32 tc_pool_high_wm[I40E_MAX_TRAFFIC_CLASS]; + u32 tc_pool_low_wm[I40E_MAX_TRAFFIC_CLASS]; +}; + +enum i40e_dcb_arbiter_mode { + I40E_DCB_ARB_MODE_STRICT_PRIORITY = 0, + I40E_DCB_ARB_MODE_ROUND_ROBIN = 1 +}; + +#define I40E_DCB_DEFAULT_MAX_EXPONENT 0xB +#define I40E_DEFAULT_PAUSE_TIME 0xffff +#define I40E_MAX_FRAME_SIZE 4608 /* 4.5 KB */ + +#define I40E_DEVICE_RPB_SIZE 968000 /* 968 KB */ + +/* BitTimes (BT) conversion */ +#define I40E_BT2KB(BT) (((BT) + (8 * 1024 - 1)) / (8 * 1024)) +#define I40E_B2BT(BT) ((BT) * 8) +#define I40E_BT2B(BT) (((BT) + (8 - 1)) / 8) + +/* Max Frame(TC) = MFS(max) + MFS(TC) */ +#define I40E_MAX_FRAME_TC(mfs_max, mfs_tc) I40E_B2BT((mfs_max) + (mfs_tc)) + +/* EEE Tx LPI Exit time in Bit Times */ +#define I40E_EEE_TX_LPI_EXIT_TIME 142500 + +/* PCI Round Trip Time in Bit Times */ +#define I40E_PCIRTT_LINK_SPEED_10G 20000 +#define I40E_PCIRTT_BYTE_LINK_SPEED_20G 40000 +#define I40E_PCIRTT_BYTE_LINK_SPEED_40G 80000 + +/* PFC Frame Delay Bit Times */ +#define I40E_PFC_FRAME_DELAY 672 + +/* Worst case Cable (10GBase-T) Delay Bit Times */ +#define I40E_CABLE_DELAY 5556 + +/* Higher Layer Delay @10G Bit Times */ +#define I40E_HIGHER_LAYER_DELAY_10G 6144 + +/* Interface Delays in Bit Times */ +/* TODO: Add for other link speeds 20G/40G/etc. */ +#define I40E_INTERFACE_DELAY_10G_MAC_CONTROL 8192 +#define I40E_INTERFACE_DELAY_10G_MAC 8192 +#define I40E_INTERFACE_DELAY_10G_RS 8192 + +#define I40E_INTERFACE_DELAY_XGXS 2048 +#define I40E_INTERFACE_DELAY_XAUI 2048 + +#define I40E_INTERFACE_DELAY_10G_BASEX_PCS 2048 +#define I40E_INTERFACE_DELAY_10G_BASER_PCS 3584 +#define I40E_INTERFACE_DELAY_LX4_PMD 512 +#define I40E_INTERFACE_DELAY_CX4_PMD 512 +#define I40E_INTERFACE_DELAY_SERIAL_PMA 512 +#define I40E_INTERFACE_DELAY_PMD 512 + +#define I40E_INTERFACE_DELAY_10G_BASET 25600 + +/* Hardware RX DCB config related defines */ +#define I40E_DCB_1_PORT_THRESHOLD 0xF +#define I40E_DCB_1_PORT_FIFO_SIZE 0x10 +#define I40E_DCB_2_PORT_THRESHOLD_LOW_NUM_TC 0xF +#define I40E_DCB_2_PORT_FIFO_SIZE_LOW_NUM_TC 0x10 +#define I40E_DCB_2_PORT_THRESHOLD_HIGH_NUM_TC 0xC +#define I40E_DCB_2_PORT_FIFO_SIZE_HIGH_NUM_TC 0x8 +#define I40E_DCB_4_PORT_THRESHOLD_LOW_NUM_TC 0x9 +#define I40E_DCB_4_PORT_FIFO_SIZE_LOW_NUM_TC 0x8 +#define I40E_DCB_4_PORT_THRESHOLD_HIGH_NUM_TC 0x6 +#define I40E_DCB_4_PORT_FIFO_SIZE_HIGH_NUM_TC 0x4 +#define I40E_DCB_WATERMARK_START_FACTOR 0x2 + +/* delay values for with 10G BaseT in Bit Times */ +#define I40E_INTERFACE_DELAY_10G_COPPER \ + (I40E_INTERFACE_DELAY_10G_MAC + (2 * I40E_INTERFACE_DELAY_XAUI) \ + + I40E_INTERFACE_DELAY_10G_BASET) +#define I40E_DV_TC(mfs_max, mfs_tc) \ + ((2 * I40E_MAX_FRAME_TC(mfs_max, mfs_tc)) \ + + I40E_PFC_FRAME_DELAY \ + + (2 * I40E_CABLE_DELAY) \ + + (2 * I40E_INTERFACE_DELAY_10G_COPPER) \ + + I40E_HIGHER_LAYER_DELAY_10G) +static inline u32 I40E_STD_DV_TC(u32 mfs_max, u32 mfs_tc) +{ + return I40E_DV_TC(mfs_max, mfs_tc) + I40E_B2BT(mfs_max); +} + +/* APIs for SW DCBX */ +void i40e_dcb_hw_rx_fifo_config(struct i40e_hw *hw, + enum i40e_dcb_arbiter_mode ets_mode, + enum i40e_dcb_arbiter_mode non_ets_mode, + u32 max_exponent, u8 lltc_map); +void i40e_dcb_hw_rx_cmd_monitor_config(struct i40e_hw *hw, + u8 num_tc, u8 num_ports); +void i40e_dcb_hw_pfc_config(struct i40e_hw *hw, + u8 pfc_en, u8 *prio_tc); +void i40e_dcb_hw_set_num_tc(struct i40e_hw *hw, u8 num_tc); +u8 i40e_dcb_hw_get_num_tc(struct i40e_hw *hw); +void i40e_dcb_hw_rx_ets_bw_config(struct i40e_hw *hw, u8 *bw_share, + u8 *mode, u8 *prio_type); +void i40e_dcb_hw_rx_up2tc_config(struct i40e_hw *hw, u8 *prio_tc); +void i40e_dcb_hw_calculate_pool_sizes(struct i40e_hw *hw, + u8 num_ports, bool eee_enabled, + u8 pfc_en, u32 *mfs_tc, + struct i40e_rx_pb_config *pb_cfg); +void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw, + struct i40e_rx_pb_config *old_pb_cfg, + struct i40e_rx_pb_config *new_pb_cfg); i40e_status i40e_get_dcbx_status(struct i40e_hw *hw, - u16 *status); + u16 *status); i40e_status i40e_lldp_to_dcb_config(u8 *lldpmib, - struct i40e_dcbx_config *dcbcfg); + struct i40e_dcbx_config *dcbcfg); i40e_status i40e_aq_get_dcb_config(struct i40e_hw *hw, u8 mib_type, - u8 bridgetype, - struct i40e_dcbx_config *dcbcfg); + u8 bridgetype, + struct i40e_dcbx_config *dcbcfg); i40e_status i40e_get_dcb_config(struct i40e_hw *hw); -i40e_status i40e_init_dcb(struct i40e_hw *hw, bool enable_mib_change); +i40e_status i40e_init_dcb(struct i40e_hw *hw, + bool enable_mib_change); +enum i40e_status_code +i40e_get_fw_lldp_status(struct i40e_hw *hw, + enum i40e_get_fw_lldp_status_resp *lldp_status); +i40e_status i40e_set_dcb_config(struct i40e_hw *hw); +i40e_status i40e_dcb_config_to_lldp(u8 *lldpmib, u16 *miblen, + struct i40e_dcbx_config *dcbcfg); #endif /* _I40E_DCB_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c index 9deae9a35423..0345132a0ef5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c @@ -1,10 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 2013 - 2018 Intel Corporation. */ +/* Copyright(c) 2013 - 2021 Intel Corporation. */ #ifdef CONFIG_I40E_DCB #include "i40e.h" #include <net/dcbnl.h> +#define I40E_DCBNL_STATUS_SUCCESS 0 +#define I40E_DCBNL_STATUS_ERROR 1 +static bool i40e_dcbnl_find_app(struct i40e_dcbx_config *cfg, + struct i40e_dcb_app_priority_table *app); /** * i40e_get_pfc_delay - retrieve PFC Link Delay * @hw: pointer to hardware struct @@ -33,14 +37,13 @@ static int i40e_dcbnl_ieee_getets(struct net_device *dev, { struct i40e_pf *pf = i40e_netdev_to_pf(dev); struct i40e_dcbx_config *dcbxcfg; - struct i40e_hw *hw = &pf->hw; if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) return -EINVAL; - dcbxcfg = &hw->local_dcbx_config; + dcbxcfg = &pf->hw.local_dcbx_config; ets->willing = dcbxcfg->etscfg.willing; - ets->ets_cap = dcbxcfg->etscfg.maxtcs; + ets->ets_cap = I40E_MAX_TRAFFIC_CLASS; ets->cbs = dcbxcfg->etscfg.cbs; memcpy(ets->tc_tx_bw, dcbxcfg->etscfg.tcbwtable, sizeof(ets->tc_tx_bw)); @@ -84,7 +87,7 @@ static int i40e_dcbnl_ieee_getpfc(struct net_device *dev, pfc->mbc = dcbxcfg->pfc.mbc; i40e_get_pfc_delay(hw, &pfc->delay); - /* Get Requests/Indicatiosn */ + /* Get Requests/Indications */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { pfc->requests[i] = pf->stats.priority_xoff_tx[i]; pfc->indications[i] = pf->stats.priority_xoff_rx[i]; @@ -94,6 +97,713 @@ static int i40e_dcbnl_ieee_getpfc(struct net_device *dev, } /** + * i40e_dcbnl_ieee_setets - set IEEE ETS configuration + * @netdev: the corresponding netdev + * @ets: structure to hold the ETS information + * + * Set IEEE ETS configuration + **/ +static int i40e_dcbnl_ieee_setets(struct net_device *netdev, + struct ieee_ets *ets) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + struct i40e_dcbx_config *old_cfg; + int i, ret; + + if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) || + (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)) + return -EINVAL; + + old_cfg = &pf->hw.local_dcbx_config; + /* Copy current config into temp */ + pf->tmp_cfg = *old_cfg; + + /* Update the ETS configuration for temp */ + pf->tmp_cfg.etscfg.willing = ets->willing; + pf->tmp_cfg.etscfg.maxtcs = I40E_MAX_TRAFFIC_CLASS; + pf->tmp_cfg.etscfg.cbs = ets->cbs; + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + pf->tmp_cfg.etscfg.tcbwtable[i] = ets->tc_tx_bw[i]; + pf->tmp_cfg.etscfg.tsatable[i] = ets->tc_tsa[i]; + pf->tmp_cfg.etscfg.prioritytable[i] = ets->prio_tc[i]; + pf->tmp_cfg.etsrec.tcbwtable[i] = ets->tc_reco_bw[i]; + pf->tmp_cfg.etsrec.tsatable[i] = ets->tc_reco_tsa[i]; + pf->tmp_cfg.etsrec.prioritytable[i] = ets->reco_prio_tc[i]; + } + + /* Commit changes to HW */ + ret = i40e_hw_dcb_config(pf, &pf->tmp_cfg); + if (ret) { + dev_info(&pf->pdev->dev, + "Failed setting DCB ETS configuration err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + return -EINVAL; + } + + return 0; +} + +/** + * i40e_dcbnl_ieee_setpfc - set local IEEE PFC configuration + * @netdev: the corresponding netdev + * @pfc: structure to hold the PFC information + * + * Sets local IEEE PFC configuration + **/ +static int i40e_dcbnl_ieee_setpfc(struct net_device *netdev, + struct ieee_pfc *pfc) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + struct i40e_dcbx_config *old_cfg; + int ret; + + if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) || + (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)) + return -EINVAL; + + old_cfg = &pf->hw.local_dcbx_config; + /* Copy current config into temp */ + pf->tmp_cfg = *old_cfg; + if (pfc->pfc_cap) + pf->tmp_cfg.pfc.pfccap = pfc->pfc_cap; + else + pf->tmp_cfg.pfc.pfccap = I40E_MAX_TRAFFIC_CLASS; + pf->tmp_cfg.pfc.pfcenable = pfc->pfc_en; + + ret = i40e_hw_dcb_config(pf, &pf->tmp_cfg); + if (ret) { + dev_info(&pf->pdev->dev, + "Failed setting DCB PFC configuration err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + return -EINVAL; + } + + return 0; +} + +/** + * i40e_dcbnl_ieee_setapp - set local IEEE App configuration + * @netdev: the corresponding netdev + * @app: structure to hold the Application information + * + * Sets local IEEE App configuration + **/ +static int i40e_dcbnl_ieee_setapp(struct net_device *netdev, + struct dcb_app *app) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + struct i40e_dcb_app_priority_table new_app; + struct i40e_dcbx_config *old_cfg; + int ret; + + if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) || + (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)) + return -EINVAL; + + old_cfg = &pf->hw.local_dcbx_config; + if (old_cfg->numapps == I40E_DCBX_MAX_APPS) + return -EINVAL; + + ret = dcb_ieee_setapp(netdev, app); + if (ret) + return ret; + + new_app.selector = app->selector; + new_app.protocolid = app->protocol; + new_app.priority = app->priority; + /* Already internally available */ + if (i40e_dcbnl_find_app(old_cfg, &new_app)) + return 0; + + /* Copy current config into temp */ + pf->tmp_cfg = *old_cfg; + /* Add the app */ + pf->tmp_cfg.app[pf->tmp_cfg.numapps++] = new_app; + + ret = i40e_hw_dcb_config(pf, &pf->tmp_cfg); + if (ret) { + dev_info(&pf->pdev->dev, + "Failed setting DCB configuration err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + return -EINVAL; + } + + return 0; +} + +/** + * i40e_dcbnl_ieee_delapp - delete local IEEE App configuration + * @netdev: the corresponding netdev + * @app: structure to hold the Application information + * + * Deletes local IEEE App configuration other than the first application + * required by firmware + **/ +static int i40e_dcbnl_ieee_delapp(struct net_device *netdev, + struct dcb_app *app) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + struct i40e_dcbx_config *old_cfg; + int i, j, ret; + + if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) || + (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)) + return -EINVAL; + + ret = dcb_ieee_delapp(netdev, app); + if (ret) + return ret; + + old_cfg = &pf->hw.local_dcbx_config; + /* Need one app for FW so keep it */ + if (old_cfg->numapps == 1) + return 0; + + /* Copy current config into temp */ + pf->tmp_cfg = *old_cfg; + + /* Find and reset the app */ + for (i = 1; i < pf->tmp_cfg.numapps; i++) { + if (app->selector == pf->tmp_cfg.app[i].selector && + app->protocol == pf->tmp_cfg.app[i].protocolid && + app->priority == pf->tmp_cfg.app[i].priority) { + /* Reset the app data */ + pf->tmp_cfg.app[i].selector = 0; + pf->tmp_cfg.app[i].protocolid = 0; + pf->tmp_cfg.app[i].priority = 0; + break; + } + } + + /* If the specific DCB app not found */ + if (i == pf->tmp_cfg.numapps) + return -EINVAL; + + pf->tmp_cfg.numapps--; + /* Overwrite the tmp_cfg app */ + for (j = i; j < pf->tmp_cfg.numapps; j++) + pf->tmp_cfg.app[j] = old_cfg->app[j + 1]; + + ret = i40e_hw_dcb_config(pf, &pf->tmp_cfg); + if (ret) { + dev_info(&pf->pdev->dev, + "Failed setting DCB configuration err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + return -EINVAL; + } + + return 0; +} + +/** + * i40e_dcbnl_getstate - Get DCB enabled state + * @netdev: the corresponding netdev + * + * Get the current DCB enabled state + **/ +static u8 i40e_dcbnl_getstate(struct net_device *netdev) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + + dev_dbg(&pf->pdev->dev, "DCB state=%d\n", + !!(pf->flags & I40E_FLAG_DCB_ENABLED)); + return !!(pf->flags & I40E_FLAG_DCB_ENABLED); +} + +/** + * i40e_dcbnl_setstate - Set DCB state + * @netdev: the corresponding netdev + * @state: enable or disable + * + * Set the DCB state + **/ +static u8 i40e_dcbnl_setstate(struct net_device *netdev, u8 state) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + int ret = I40E_DCBNL_STATUS_SUCCESS; + + if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) || + (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)) + return ret; + + dev_dbg(&pf->pdev->dev, "new state=%d current state=%d\n", + state, (pf->flags & I40E_FLAG_DCB_ENABLED) ? 1 : 0); + /* Nothing to do */ + if (!state == !(pf->flags & I40E_FLAG_DCB_ENABLED)) + return ret; + + if (i40e_is_sw_dcb(pf)) { + if (state) { + pf->flags |= I40E_FLAG_DCB_ENABLED; + memcpy(&pf->hw.desired_dcbx_config, + &pf->hw.local_dcbx_config, + sizeof(struct i40e_dcbx_config)); + } else { + pf->flags &= ~I40E_FLAG_DCB_ENABLED; + } + } else { + /* Cannot directly manipulate FW LLDP Agent */ + ret = I40E_DCBNL_STATUS_ERROR; + } + return ret; +} + +/** + * i40e_dcbnl_set_pg_tc_cfg_tx - Set CEE PG Tx config + * @netdev: the corresponding netdev + * @tc: the corresponding traffic class + * @prio_type: the traffic priority type + * @bwg_id: the BW group id the traffic class belongs to + * @bw_pct: the BW percentage for the corresponding BWG + * @up_map: prio mapped to corresponding tc + * + * Set Tx PG settings for CEE mode + **/ +static void i40e_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc, + u8 prio_type, u8 bwg_id, u8 bw_pct, + u8 up_map) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + int i; + + if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) || + (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)) + return; + + /* LLTC not supported yet */ + if (tc >= I40E_MAX_TRAFFIC_CLASS) + return; + + /* prio_type, bwg_id and bw_pct per UP are not supported */ + + /* Use only up_map to map tc */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + if (up_map & BIT(i)) + pf->tmp_cfg.etscfg.prioritytable[i] = tc; + } + pf->tmp_cfg.etscfg.tsatable[tc] = I40E_IEEE_TSA_ETS; + dev_dbg(&pf->pdev->dev, + "Set PG config tc=%d bwg_id=%d prio_type=%d bw_pct=%d up_map=%d\n", + tc, bwg_id, prio_type, bw_pct, up_map); +} + +/** + * i40e_dcbnl_set_pg_tc_cfg_tx - Set CEE PG Tx BW config + * @netdev: the corresponding netdev + * @pgid: the corresponding traffic class + * @bw_pct: the BW percentage for the specified traffic class + * + * Set Tx BW settings for CEE mode + **/ +static void i40e_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, + u8 bw_pct) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + + if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) || + (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)) + return; + + /* LLTC not supported yet */ + if (pgid >= I40E_MAX_TRAFFIC_CLASS) + return; + + pf->tmp_cfg.etscfg.tcbwtable[pgid] = bw_pct; + dev_dbg(&pf->pdev->dev, "Set PG BW config tc=%d bw_pct=%d\n", + pgid, bw_pct); +} + +/** + * i40e_dcbnl_set_pg_tc_cfg_rx - Set CEE PG Rx config + * @netdev: the corresponding netdev + * @prio: the corresponding traffic class + * @prio_type: the traffic priority type + * @pgid: the BW group id the traffic class belongs to + * @bw_pct: the BW percentage for the corresponding BWG + * @up_map: prio mapped to corresponding tc + * + * Set Rx BW settings for CEE mode. The hardware does not support this + * so we won't allow setting of this parameter. + **/ +static void i40e_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev, + int __always_unused prio, + u8 __always_unused prio_type, + u8 __always_unused pgid, + u8 __always_unused bw_pct, + u8 __always_unused up_map) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + + dev_dbg(&pf->pdev->dev, "Rx TC PG Config Not Supported.\n"); +} + +/** + * i40e_dcbnl_set_pg_bwg_cfg_rx - Set CEE PG Rx config + * @netdev: the corresponding netdev + * @pgid: the corresponding traffic class + * @bw_pct: the BW percentage for the specified traffic class + * + * Set Rx BW settings for CEE mode. The hardware does not support this + * so we won't allow setting of this parameter. + **/ +static void i40e_dcbnl_set_pg_bwg_cfg_rx(struct net_device *netdev, int pgid, + u8 bw_pct) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + + dev_dbg(&pf->pdev->dev, "Rx BWG PG Config Not Supported.\n"); +} + +/** + * i40e_dcbnl_get_pg_tc_cfg_tx - Get CEE PG Tx config + * @netdev: the corresponding netdev + * @prio: the corresponding user priority + * @prio_type: traffic priority type + * @pgid: the BW group ID the traffic class belongs to + * @bw_pct: BW percentage for the corresponding BWG + * @up_map: prio mapped to corresponding TC + * + * Get Tx PG settings for CEE mode + **/ +static void i40e_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int prio, + u8 __always_unused *prio_type, + u8 *pgid, + u8 __always_unused *bw_pct, + u8 __always_unused *up_map) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + + if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) || + (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)) + return; + + if (prio >= I40E_MAX_USER_PRIORITY) + return; + + *pgid = pf->hw.local_dcbx_config.etscfg.prioritytable[prio]; + dev_dbg(&pf->pdev->dev, "Get PG config prio=%d tc=%d\n", + prio, *pgid); +} + +/** + * i40e_dcbnl_get_pg_bwg_cfg_tx - Get CEE PG BW config + * @netdev: the corresponding netdev + * @pgid: the corresponding traffic class + * @bw_pct: the BW percentage for the corresponding TC + * + * Get Tx BW settings for given TC in CEE mode + **/ +static void i40e_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, + u8 *bw_pct) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + + if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) || + (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)) + return; + + if (pgid >= I40E_MAX_TRAFFIC_CLASS) + return; + + *bw_pct = pf->hw.local_dcbx_config.etscfg.tcbwtable[pgid]; + dev_dbg(&pf->pdev->dev, "Get PG BW config tc=%d bw_pct=%d\n", + pgid, *bw_pct); +} + +/** + * i40e_dcbnl_get_pg_tc_cfg_rx - Get CEE PG Rx config + * @netdev: the corresponding netdev + * @prio: the corresponding user priority + * @prio_type: the traffic priority type + * @pgid: the PG ID + * @bw_pct: the BW percentage for the corresponding BWG + * @up_map: prio mapped to corresponding TC + * + * Get Rx PG settings for CEE mode. The UP2TC map is applied in same + * manner for Tx and Rx (symmetrical) so return the TC information for + * given priority accordingly. + **/ +static void i40e_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int prio, + u8 *prio_type, u8 *pgid, u8 *bw_pct, + u8 *up_map) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + + if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) || + (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)) + return; + + if (prio >= I40E_MAX_USER_PRIORITY) + return; + + *pgid = pf->hw.local_dcbx_config.etscfg.prioritytable[prio]; +} + +/** + * i40e_dcbnl_get_pg_bwg_cfg_rx - Get CEE PG BW Rx config + * @netdev: the corresponding netdev + * @pgid: the corresponding traffic class + * @bw_pct: the BW percentage for the corresponding TC + * + * Get Rx BW settings for given TC in CEE mode + * The adapter doesn't support Rx ETS and runs in strict priority + * mode in Rx path and hence just return 0. + **/ +static void i40e_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int pgid, + u8 *bw_pct) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + + if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) || + (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)) + return; + *bw_pct = 0; +} + +/** + * i40e_dcbnl_set_pfc_cfg - Set CEE PFC configuration + * @netdev: the corresponding netdev + * @prio: the corresponding user priority + * @setting: the PFC setting for given priority + * + * Set the PFC enabled/disabled setting for given user priority + **/ +static void i40e_dcbnl_set_pfc_cfg(struct net_device *netdev, int prio, + u8 setting) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + + if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) || + (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)) + return; + + if (prio >= I40E_MAX_USER_PRIORITY) + return; + + pf->tmp_cfg.pfc.pfccap = I40E_MAX_TRAFFIC_CLASS; + if (setting) + pf->tmp_cfg.pfc.pfcenable |= BIT(prio); + else + pf->tmp_cfg.pfc.pfcenable &= ~BIT(prio); + dev_dbg(&pf->pdev->dev, + "Set PFC Config up=%d setting=%d pfcenable=0x%x\n", + prio, setting, pf->tmp_cfg.pfc.pfcenable); +} + +/** + * i40e_dcbnl_get_pfc_cfg - Get CEE PFC configuration + * @netdev: the corresponding netdev + * @prio: the corresponding user priority + * @setting: the PFC setting for given priority + * + * Get the PFC enabled/disabled setting for given user priority + **/ +static void i40e_dcbnl_get_pfc_cfg(struct net_device *netdev, int prio, + u8 *setting) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + + if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) || + (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)) + return; + + if (prio >= I40E_MAX_USER_PRIORITY) + return; + + *setting = (pf->hw.local_dcbx_config.pfc.pfcenable >> prio) & 0x1; + dev_dbg(&pf->pdev->dev, + "Get PFC Config up=%d setting=%d pfcenable=0x%x\n", + prio, *setting, pf->hw.local_dcbx_config.pfc.pfcenable); +} + +/** + * i40e_dcbnl_cee_set_all - Commit CEE DCB settings to hardware + * @netdev: the corresponding netdev + * + * Commit the current DCB configuration to hardware + **/ +static u8 i40e_dcbnl_cee_set_all(struct net_device *netdev) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + int err; + + if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) || + (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)) + return I40E_DCBNL_STATUS_ERROR; + + dev_dbg(&pf->pdev->dev, "Commit DCB Configuration to the hardware\n"); + err = i40e_hw_dcb_config(pf, &pf->tmp_cfg); + + return err ? I40E_DCBNL_STATUS_ERROR : I40E_DCBNL_STATUS_SUCCESS; +} + +/** + * i40e_dcbnl_get_cap - Get DCBX capabilities of adapter + * @netdev: the corresponding netdev + * @capid: the capability type + * @cap: the capability value + * + * Return the capability value for a given capability type + **/ +static u8 i40e_dcbnl_get_cap(struct net_device *netdev, int capid, u8 *cap) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + + if (!(pf->flags & I40E_FLAG_DCB_CAPABLE)) + return I40E_DCBNL_STATUS_ERROR; + + switch (capid) { + case DCB_CAP_ATTR_PG: + case DCB_CAP_ATTR_PFC: + *cap = true; + break; + case DCB_CAP_ATTR_PG_TCS: + case DCB_CAP_ATTR_PFC_TCS: + *cap = 0x80; + break; + case DCB_CAP_ATTR_DCBX: + *cap = pf->dcbx_cap; + break; + case DCB_CAP_ATTR_UP2TC: + case DCB_CAP_ATTR_GSP: + case DCB_CAP_ATTR_BCN: + default: + *cap = false; + break; + } + + dev_dbg(&pf->pdev->dev, "Get Capability cap=%d capval=0x%x\n", + capid, *cap); + return I40E_DCBNL_STATUS_SUCCESS; +} + +/** + * i40e_dcbnl_getnumtcs - Get max number of traffic classes supported + * @netdev: the corresponding netdev + * @tcid: the TC id + * @num: total number of TCs supported by the device + * + * Return the total number of TCs supported by the adapter + **/ +static int i40e_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + + if (!(pf->flags & I40E_FLAG_DCB_CAPABLE)) + return -EINVAL; + + *num = I40E_MAX_TRAFFIC_CLASS; + return 0; +} + +/** + * i40e_dcbnl_setnumtcs - Set CEE number of traffic classes + * @netdev: the corresponding netdev + * @tcid: the TC id + * @num: total number of TCs + * + * Set the total number of TCs (Unsupported) + **/ +static int i40e_dcbnl_setnumtcs(struct net_device *netdev, int tcid, u8 num) +{ + return -EINVAL; +} + +/** + * i40e_dcbnl_getpfcstate - Get CEE PFC mode + * @netdev: the corresponding netdev + * + * Get the current PFC enabled state + **/ +static u8 i40e_dcbnl_getpfcstate(struct net_device *netdev) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + + /* Return enabled if any PFC enabled UP */ + if (pf->hw.local_dcbx_config.pfc.pfcenable) + return 1; + else + return 0; +} + +/** + * i40e_dcbnl_setpfcstate - Set CEE PFC mode + * @netdev: the corresponding netdev + * @state: required state + * + * The PFC state to be set; this is enabled/disabled based on the PFC + * priority settings and not via this call for i40e driver + **/ +static void i40e_dcbnl_setpfcstate(struct net_device *netdev, u8 state) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + + dev_dbg(&pf->pdev->dev, "PFC State is modified via PFC config.\n"); +} + +/** + * i40e_dcbnl_getapp - Get CEE APP + * @netdev: the corresponding netdev + * @idtype: the App selector + * @id: the App ethtype or port number + * + * Return the CEE mode app for the given idtype and id + **/ +static int i40e_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + struct dcb_app app = { + .selector = idtype, + .protocol = id, + }; + + if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) || + (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)) + return -EINVAL; + + return dcb_getapp(netdev, &app); +} + +/** + * i40e_dcbnl_setdcbx - set required DCBx capability + * @netdev: the corresponding netdev + * @mode: new DCB mode managed or CEE+IEEE + * + * Set DCBx capability features + **/ +static u8 i40e_dcbnl_setdcbx(struct net_device *netdev, u8 mode) +{ + struct i40e_pf *pf = i40e_netdev_to_pf(netdev); + + /* Do not allow to set mode if managed by Firmware */ + if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) + return I40E_DCBNL_STATUS_ERROR; + + /* No support for LLD_MANAGED modes or CEE+IEEE */ + if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || + ((mode & DCB_CAP_DCBX_VER_IEEE) && (mode & DCB_CAP_DCBX_VER_CEE)) || + !(mode & DCB_CAP_DCBX_HOST)) + return I40E_DCBNL_STATUS_ERROR; + + /* Already set to the given mode no change */ + if (mode == pf->dcbx_cap) + return I40E_DCBNL_STATUS_SUCCESS; + + pf->dcbx_cap = mode; + if (mode & DCB_CAP_DCBX_VER_CEE) + pf->hw.local_dcbx_config.dcbx_mode = I40E_DCBX_MODE_CEE; + else + pf->hw.local_dcbx_config.dcbx_mode = I40E_DCBX_MODE_IEEE; + + dev_dbg(&pf->pdev->dev, "mode=%d\n", mode); + return I40E_DCBNL_STATUS_SUCCESS; +} + +/** * i40e_dcbnl_getdcbx - retrieve current DCBx capability * @dev: the corresponding netdev * @@ -132,7 +842,31 @@ static const struct dcbnl_rtnl_ops dcbnl_ops = { .ieee_getets = i40e_dcbnl_ieee_getets, .ieee_getpfc = i40e_dcbnl_ieee_getpfc, .getdcbx = i40e_dcbnl_getdcbx, - .getpermhwaddr = i40e_dcbnl_get_perm_hw_addr, + .getpermhwaddr = i40e_dcbnl_get_perm_hw_addr, + .ieee_setets = i40e_dcbnl_ieee_setets, + .ieee_setpfc = i40e_dcbnl_ieee_setpfc, + .ieee_setapp = i40e_dcbnl_ieee_setapp, + .ieee_delapp = i40e_dcbnl_ieee_delapp, + .getstate = i40e_dcbnl_getstate, + .setstate = i40e_dcbnl_setstate, + .setpgtccfgtx = i40e_dcbnl_set_pg_tc_cfg_tx, + .setpgbwgcfgtx = i40e_dcbnl_set_pg_bwg_cfg_tx, + .setpgtccfgrx = i40e_dcbnl_set_pg_tc_cfg_rx, + .setpgbwgcfgrx = i40e_dcbnl_set_pg_bwg_cfg_rx, + .getpgtccfgtx = i40e_dcbnl_get_pg_tc_cfg_tx, + .getpgbwgcfgtx = i40e_dcbnl_get_pg_bwg_cfg_tx, + .getpgtccfgrx = i40e_dcbnl_get_pg_tc_cfg_rx, + .getpgbwgcfgrx = i40e_dcbnl_get_pg_bwg_cfg_rx, + .setpfccfg = i40e_dcbnl_set_pfc_cfg, + .getpfccfg = i40e_dcbnl_get_pfc_cfg, + .setall = i40e_dcbnl_cee_set_all, + .getcap = i40e_dcbnl_get_cap, + .getnumtcs = i40e_dcbnl_getnumtcs, + .setnumtcs = i40e_dcbnl_setnumtcs, + .getpfcstate = i40e_dcbnl_getpfcstate, + .setpfcstate = i40e_dcbnl_setpfcstate, + .getapp = i40e_dcbnl_getapp, + .setdcbx = i40e_dcbnl_setdcbx, }; /** @@ -152,12 +886,16 @@ void i40e_dcbnl_set_all(struct i40e_vsi *vsi) u8 prio, tc_map; int i; + /* SW DCB taken care by DCBNL set calls */ + if (pf->dcbx_cap & DCB_CAP_DCBX_HOST) + return; + /* DCB not enabled */ if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) return; /* MFP mode but not an iSCSI PF so return */ - if ((pf->flags & I40E_FLAG_MFP_ENABLED) && !(pf->hw.func_caps.iscsi)) + if ((pf->flags & I40E_FLAG_MFP_ENABLED) && !(hw->func_caps.iscsi)) return; dcbxcfg = &hw->local_dcbx_config; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 26ba1f3eb2d8..8a4dd77a12da 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -3222,13 +3222,30 @@ static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf, fsp->m_u.usr_ip4_spec.proto = 0; } - /* Reverse the src and dest notion, since the HW views them from - * Tx perspective where as the user expects it from Rx filter view. - */ - fsp->h_u.tcp_ip4_spec.psrc = rule->dst_port; - fsp->h_u.tcp_ip4_spec.pdst = rule->src_port; - fsp->h_u.tcp_ip4_spec.ip4src = rule->dst_ip; - fsp->h_u.tcp_ip4_spec.ip4dst = rule->src_ip; + if (fsp->flow_type == IPV6_USER_FLOW || + fsp->flow_type == UDP_V6_FLOW || + fsp->flow_type == TCP_V6_FLOW || + fsp->flow_type == SCTP_V6_FLOW) { + /* Reverse the src and dest notion, since the HW views them + * from Tx perspective where as the user expects it from + * Rx filter view. + */ + fsp->h_u.tcp_ip6_spec.psrc = rule->dst_port; + fsp->h_u.tcp_ip6_spec.pdst = rule->src_port; + memcpy(fsp->h_u.tcp_ip6_spec.ip6dst, rule->src_ip6, + sizeof(__be32) * 4); + memcpy(fsp->h_u.tcp_ip6_spec.ip6src, rule->dst_ip6, + sizeof(__be32) * 4); + } else { + /* Reverse the src and dest notion, since the HW views them + * from Tx perspective where as the user expects it from + * Rx filter view. + */ + fsp->h_u.tcp_ip4_spec.psrc = rule->dst_port; + fsp->h_u.tcp_ip4_spec.pdst = rule->src_port; + fsp->h_u.tcp_ip4_spec.ip4src = rule->dst_ip; + fsp->h_u.tcp_ip4_spec.ip4dst = rule->src_ip; + } switch (rule->flow_type) { case SCTP_V4_FLOW: @@ -3240,9 +3257,21 @@ static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf, case UDP_V4_FLOW: index = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; break; + case SCTP_V6_FLOW: + index = I40E_FILTER_PCTYPE_NONF_IPV6_SCTP; + break; + case TCP_V6_FLOW: + index = I40E_FILTER_PCTYPE_NONF_IPV6_TCP; + break; + case UDP_V6_FLOW: + index = I40E_FILTER_PCTYPE_NONF_IPV6_UDP; + break; case IP_USER_FLOW: index = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER; break; + case IPV6_USER_FLOW: + index = I40E_FILTER_PCTYPE_NONF_IPV6_OTHER; + break; default: /* If we have stored a filter with a flow type not listed here * it is almost certainly a driver bug. WARN(), and then @@ -3258,6 +3287,20 @@ static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf, input_set = i40e_read_fd_input_set(pf, index); no_input_set: + if (input_set & I40E_L3_V6_SRC_MASK) { + fsp->m_u.tcp_ip6_spec.ip6src[0] = htonl(0xFFFFFFFF); + fsp->m_u.tcp_ip6_spec.ip6src[1] = htonl(0xFFFFFFFF); + fsp->m_u.tcp_ip6_spec.ip6src[2] = htonl(0xFFFFFFFF); + fsp->m_u.tcp_ip6_spec.ip6src[3] = htonl(0xFFFFFFFF); + } + + if (input_set & I40E_L3_V6_DST_MASK) { + fsp->m_u.tcp_ip6_spec.ip6dst[0] = htonl(0xFFFFFFFF); + fsp->m_u.tcp_ip6_spec.ip6dst[1] = htonl(0xFFFFFFFF); + fsp->m_u.tcp_ip6_spec.ip6dst[2] = htonl(0xFFFFFFFF); + fsp->m_u.tcp_ip6_spec.ip6dst[3] = htonl(0xFFFFFFFF); + } + if (input_set & I40E_L3_SRC_MASK) fsp->m_u.tcp_ip4_spec.ip4src = htonl(0xFFFFFFFF); @@ -3275,6 +3318,14 @@ no_input_set: else fsp->ring_cookie = rule->q_index; + if (rule->vlan_tag) { + fsp->h_ext.vlan_etype = rule->vlan_etype; + fsp->m_ext.vlan_etype = htons(0xFFFF); + fsp->h_ext.vlan_tci = rule->vlan_tag; + fsp->m_ext.vlan_tci = htons(0xFFFF); + fsp->flow_type |= FLOW_EXT; + } + if (rule->dest_vsi != pf->vsi[pf->lan_vsi]->id) { struct i40e_vsi *vsi; @@ -3921,6 +3972,14 @@ static const char *i40e_flow_str(struct ethtool_rx_flow_spec *fsp) return "sctp4"; case IP_USER_FLOW: return "ip4"; + case TCP_V6_FLOW: + return "tcp6"; + case UDP_V6_FLOW: + return "udp6"; + case SCTP_V6_FLOW: + return "sctp6"; + case IPV6_USER_FLOW: + return "ip6"; default: return "unknown"; } @@ -4056,9 +4115,14 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, struct ethtool_rx_flow_spec *fsp, struct i40e_rx_flow_userdef *userdef) { - struct i40e_pf *pf = vsi->back; + static const __be32 ipv6_full_mask[4] = {cpu_to_be32(0xffffffff), + cpu_to_be32(0xffffffff), cpu_to_be32(0xffffffff), + cpu_to_be32(0xffffffff)}; + struct ethtool_tcpip6_spec *tcp_ip6_spec; + struct ethtool_usrip6_spec *usr_ip6_spec; struct ethtool_tcpip4_spec *tcp_ip4_spec; struct ethtool_usrip4_spec *usr_ip4_spec; + struct i40e_pf *pf = vsi->back; u64 current_mask, new_mask; bool new_flex_offset = false; bool flex_l3 = false; @@ -4080,11 +4144,28 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, index = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; fdir_filter_count = &pf->fd_udp4_filter_cnt; break; + case SCTP_V6_FLOW: + index = I40E_FILTER_PCTYPE_NONF_IPV6_SCTP; + fdir_filter_count = &pf->fd_sctp6_filter_cnt; + break; + case TCP_V6_FLOW: + index = I40E_FILTER_PCTYPE_NONF_IPV6_TCP; + fdir_filter_count = &pf->fd_tcp6_filter_cnt; + break; + case UDP_V6_FLOW: + index = I40E_FILTER_PCTYPE_NONF_IPV6_UDP; + fdir_filter_count = &pf->fd_udp6_filter_cnt; + break; case IP_USER_FLOW: index = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER; fdir_filter_count = &pf->fd_ip4_filter_cnt; flex_l3 = true; break; + case IPV6_USER_FLOW: + index = I40E_FILTER_PCTYPE_NONF_IPV6_OTHER; + fdir_filter_count = &pf->fd_ip6_filter_cnt; + flex_l3 = true; + break; default: return -EOPNOTSUPP; } @@ -4147,6 +4228,53 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, return -EOPNOTSUPP; break; + case SCTP_V6_FLOW: + new_mask &= ~I40E_VERIFY_TAG_MASK; + fallthrough; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + tcp_ip6_spec = &fsp->m_u.tcp_ip6_spec; + + /* Check if user provided IPv6 source address. */ + if (ipv6_addr_equal((struct in6_addr *)&tcp_ip6_spec->ip6src, + (struct in6_addr *)&ipv6_full_mask)) + new_mask |= I40E_L3_V6_SRC_MASK; + else if (ipv6_addr_any((struct in6_addr *) + &tcp_ip6_spec->ip6src)) + new_mask &= ~I40E_L3_V6_SRC_MASK; + else + return -EOPNOTSUPP; + + /* Check if user provided destination address. */ + if (ipv6_addr_equal((struct in6_addr *)&tcp_ip6_spec->ip6dst, + (struct in6_addr *)&ipv6_full_mask)) + new_mask |= I40E_L3_V6_DST_MASK; + else if (ipv6_addr_any((struct in6_addr *) + &tcp_ip6_spec->ip6src)) + new_mask &= ~I40E_L3_V6_DST_MASK; + else + return -EOPNOTSUPP; + + /* L4 source port */ + if (tcp_ip6_spec->psrc == htons(0xFFFF)) + new_mask |= I40E_L4_SRC_MASK; + else if (!tcp_ip6_spec->psrc) + new_mask &= ~I40E_L4_SRC_MASK; + else + return -EOPNOTSUPP; + + /* L4 destination port */ + if (tcp_ip6_spec->pdst == htons(0xFFFF)) + new_mask |= I40E_L4_DST_MASK; + else if (!tcp_ip6_spec->pdst) + new_mask &= ~I40E_L4_DST_MASK; + else + return -EOPNOTSUPP; + + /* Filtering on Traffic Classes is not supported. */ + if (tcp_ip6_spec->tclass) + return -EOPNOTSUPP; + break; case IP_USER_FLOW: usr_ip4_spec = &fsp->m_u.usr_ip4_spec; @@ -4187,10 +4315,62 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, return -EINVAL; break; + case IPV6_USER_FLOW: + usr_ip6_spec = &fsp->m_u.usr_ip6_spec; + + /* Check if user provided IPv6 source address. */ + if (ipv6_addr_equal((struct in6_addr *)&usr_ip6_spec->ip6src, + (struct in6_addr *)&ipv6_full_mask)) + new_mask |= I40E_L3_V6_SRC_MASK; + else if (ipv6_addr_any((struct in6_addr *) + &usr_ip6_spec->ip6src)) + new_mask &= ~I40E_L3_V6_SRC_MASK; + else + return -EOPNOTSUPP; + + /* Check if user provided destination address. */ + if (ipv6_addr_equal((struct in6_addr *)&usr_ip6_spec->ip6dst, + (struct in6_addr *)&ipv6_full_mask)) + new_mask |= I40E_L3_V6_DST_MASK; + else if (ipv6_addr_any((struct in6_addr *) + &usr_ip6_spec->ip6src)) + new_mask &= ~I40E_L3_V6_DST_MASK; + else + return -EOPNOTSUPP; + + if (usr_ip6_spec->l4_4_bytes == htonl(0xFFFFFFFF)) + new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK; + else if (!usr_ip6_spec->l4_4_bytes) + new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK); + else + return -EOPNOTSUPP; + + /* Filtering on Traffic class is not supported. */ + if (usr_ip6_spec->tclass) + return -EOPNOTSUPP; + + /* Filtering on L4 protocol is not supported */ + if (usr_ip6_spec->l4_proto) + return -EINVAL; + + break; default: return -EOPNOTSUPP; } + if (fsp->flow_type & FLOW_EXT) { + /* Allow only 802.1Q and no etype defined, as + * later it's modified to 0x8100 + */ + if (fsp->h_ext.vlan_etype != htons(ETH_P_8021Q) && + fsp->h_ext.vlan_etype != 0) + return -EOPNOTSUPP; + if (fsp->m_ext.vlan_tci == htons(0xFFFF)) + new_mask |= I40E_VLAN_SRC_MASK; + else + new_mask &= ~I40E_VLAN_SRC_MASK; + } + /* First, clear all flexible filter entries */ new_mask &= ~I40E_FLEX_INPUT_MASK; @@ -4370,7 +4550,9 @@ static bool i40e_match_fdir_filter(struct i40e_fdir_filter *a, a->dst_port != b->dst_port || a->src_port != b->src_port || a->flow_type != b->flow_type || - a->ip4_proto != b->ip4_proto) + a->ipl4_proto != b->ipl4_proto || + a->vlan_tag != b->vlan_tag || + a->vlan_etype != b->vlan_etype) return false; return true; @@ -4528,15 +4710,38 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi, input->dst_ip = fsp->h_u.tcp_ip4_spec.ip4src; input->src_ip = fsp->h_u.tcp_ip4_spec.ip4dst; input->flow_type = fsp->flow_type & ~FLOW_EXT; - input->ip4_proto = fsp->h_u.usr_ip4_spec.proto; - /* Reverse the src and dest notion, since the HW expects them to be from - * Tx perspective where as the input from user is from Rx filter view. - */ - input->dst_port = fsp->h_u.tcp_ip4_spec.psrc; - input->src_port = fsp->h_u.tcp_ip4_spec.pdst; - input->dst_ip = fsp->h_u.tcp_ip4_spec.ip4src; - input->src_ip = fsp->h_u.tcp_ip4_spec.ip4dst; + input->vlan_etype = fsp->h_ext.vlan_etype; + if (!fsp->m_ext.vlan_etype && fsp->h_ext.vlan_tci) + input->vlan_etype = cpu_to_be16(ETH_P_8021Q); + if (fsp->m_ext.vlan_tci && input->vlan_etype) + input->vlan_tag = fsp->h_ext.vlan_tci; + if (input->flow_type == IPV6_USER_FLOW || + input->flow_type == UDP_V6_FLOW || + input->flow_type == TCP_V6_FLOW || + input->flow_type == SCTP_V6_FLOW) { + /* Reverse the src and dest notion, since the HW expects them + * to be from Tx perspective where as the input from user is + * from Rx filter view. + */ + input->ipl4_proto = fsp->h_u.usr_ip6_spec.l4_proto; + input->dst_port = fsp->h_u.tcp_ip6_spec.psrc; + input->src_port = fsp->h_u.tcp_ip6_spec.pdst; + memcpy(input->dst_ip6, fsp->h_u.ah_ip6_spec.ip6src, + sizeof(__be32) * 4); + memcpy(input->src_ip6, fsp->h_u.ah_ip6_spec.ip6dst, + sizeof(__be32) * 4); + } else { + /* Reverse the src and dest notion, since the HW expects them + * to be from Tx perspective where as the input from user is + * from Rx filter view. + */ + input->ipl4_proto = fsp->h_u.usr_ip4_spec.proto; + input->dst_port = fsp->h_u.tcp_ip4_spec.psrc; + input->src_port = fsp->h_u.tcp_ip4_spec.pdst; + input->dst_ip = fsp->h_u.tcp_ip4_spec.ip4src; + input->src_ip = fsp->h_u.tcp_ip4_spec.ip4dst; + } if (userdef.flex_filter) { input->flex_filter = true; @@ -5033,23 +5238,13 @@ flags_complete: if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) { if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) { - struct i40e_dcbx_config *dcbcfg; - +#ifdef CONFIG_I40E_DCB + i40e_dcb_sw_default_config(pf); +#endif /* CONFIG_I40E_DCB */ + i40e_aq_cfg_lldp_mib_change_event(&pf->hw, false, NULL); i40e_aq_stop_lldp(&pf->hw, true, false, NULL); - i40e_aq_set_dcb_parameters(&pf->hw, true, NULL); - /* reset local_dcbx_config to default */ - dcbcfg = &pf->hw.local_dcbx_config; - dcbcfg->etscfg.willing = 1; - dcbcfg->etscfg.maxtcs = 0; - dcbcfg->etscfg.tcbwtable[0] = 100; - for (i = 1; i < I40E_MAX_TRAFFIC_CLASS; i++) - dcbcfg->etscfg.tcbwtable[i] = 0; - for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) - dcbcfg->etscfg.prioritytable[i] = 0; - dcbcfg->etscfg.tsatable[0] = I40E_IEEE_TSA_ETS; - dcbcfg->pfc.willing = 1; - dcbcfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS; } else { + i40e_set_lldp_forwarding(pf, false); status = i40e_aq_start_lldp(&pf->hw, false, NULL); if (status) { adq_err = pf->hw.aq.asq_last_status; @@ -5252,12 +5447,131 @@ static int i40e_get_module_eeprom(struct net_device *netdev, static int i40e_get_eee(struct net_device *netdev, struct ethtool_eee *edata) { - return -EOPNOTSUPP; + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_aq_get_phy_abilities_resp phy_cfg; + enum i40e_status_code status = 0; + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + + /* Get initial PHY capabilities */ + status = i40e_aq_get_phy_capabilities(hw, false, true, &phy_cfg, NULL); + if (status) + return -EAGAIN; + + /* Check whether NIC configuration is compatible with Energy Efficient + * Ethernet (EEE) mode. + */ + if (phy_cfg.eee_capability == 0) + return -EOPNOTSUPP; + + edata->supported = SUPPORTED_Autoneg; + edata->lp_advertised = edata->supported; + + /* Get current configuration */ + status = i40e_aq_get_phy_capabilities(hw, false, false, &phy_cfg, NULL); + if (status) + return -EAGAIN; + + edata->advertised = phy_cfg.eee_capability ? SUPPORTED_Autoneg : 0U; + edata->eee_enabled = !!edata->advertised; + edata->tx_lpi_enabled = pf->stats.tx_lpi_status; + + edata->eee_active = pf->stats.tx_lpi_status && pf->stats.rx_lpi_status; + + return 0; +} + +static int i40e_is_eee_param_supported(struct net_device *netdev, + struct ethtool_eee *edata) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + struct i40e_ethtool_not_used { + u32 value; + const char *name; + } param[] = { + {edata->advertised & ~SUPPORTED_Autoneg, "advertise"}, + {edata->tx_lpi_timer, "tx-timer"}, + {edata->tx_lpi_enabled != pf->stats.tx_lpi_status, "tx-lpi"} + }; + int i; + + for (i = 0; i < ARRAY_SIZE(param); i++) { + if (param[i].value) { + netdev_info(netdev, + "EEE setting %s not supported\n", + param[i].name); + return -EOPNOTSUPP; + } + } + + return 0; } static int i40e_set_eee(struct net_device *netdev, struct ethtool_eee *edata) { - return -EOPNOTSUPP; + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_aq_get_phy_abilities_resp abilities; + enum i40e_status_code status = I40E_SUCCESS; + struct i40e_aq_set_phy_config config; + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + __le16 eee_capability; + + /* Deny parameters we don't support */ + if (i40e_is_eee_param_supported(netdev, edata)) + return -EOPNOTSUPP; + + /* Get initial PHY capabilities */ + status = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, + NULL); + if (status) + return -EAGAIN; + + /* Check whether NIC configuration is compatible with Energy Efficient + * Ethernet (EEE) mode. + */ + if (abilities.eee_capability == 0) + return -EOPNOTSUPP; + + /* Cache initial EEE capability */ + eee_capability = abilities.eee_capability; + + /* Get current PHY configuration */ + status = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, + NULL); + if (status) + return -EAGAIN; + + /* Cache current PHY configuration */ + config.phy_type = abilities.phy_type; + config.phy_type_ext = abilities.phy_type_ext; + config.link_speed = abilities.link_speed; + config.abilities = abilities.abilities | + I40E_AQ_PHY_ENABLE_ATOMIC_LINK; + config.eeer = abilities.eeer_val; + config.low_power_ctrl = abilities.d3_lpan; + config.fec_config = abilities.fec_cfg_curr_mod_ext_info & + I40E_AQ_PHY_FEC_CONFIG_MASK; + + /* Set desired EEE state */ + if (edata->eee_enabled) { + config.eee_capability = eee_capability; + config.eeer |= cpu_to_le32(I40E_PRTPM_EEER_TX_LPI_EN_MASK); + } else { + config.eee_capability = 0; + config.eeer &= cpu_to_le32(~I40E_PRTPM_EEER_TX_LPI_EN_MASK); + } + + /* Apply modified PHY configuration */ + status = i40e_aq_set_phy_config(hw, &config, NULL); + if (status) + return -EAGAIN; + + return 0; } static const struct ethtool_ops i40e_ethtool_recovery_mode_ops = { diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 521ea9df38d5..63e19d2e3301 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 2013 - 2018 Intel Corporation. */ +/* Copyright(c) 2013 - 2021 Intel Corporation. */ #include <linux/etherdevice.h> #include <linux/of_net.h> @@ -35,7 +35,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit); static int i40e_setup_misc_vector(struct i40e_pf *pf); static void i40e_determine_queue_usage(struct i40e_pf *pf); static int i40e_setup_pf_filter_control(struct i40e_pf *pf); -static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired); +static void i40e_prep_for_reset(struct i40e_pf *pf); static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired); static int i40e_reset(struct i40e_pf *pf); @@ -3496,6 +3496,24 @@ static void i40e_set_vsi_rx_mode(struct i40e_vsi *vsi) } /** + * i40e_reset_fdir_filter_cnt - Reset flow director filter counters + * @pf: Pointer to the targeted PF + * + * Set all flow director counters to 0. + */ +static void i40e_reset_fdir_filter_cnt(struct i40e_pf *pf) +{ + pf->fd_tcp4_filter_cnt = 0; + pf->fd_udp4_filter_cnt = 0; + pf->fd_sctp4_filter_cnt = 0; + pf->fd_ip4_filter_cnt = 0; + pf->fd_tcp6_filter_cnt = 0; + pf->fd_udp6_filter_cnt = 0; + pf->fd_sctp6_filter_cnt = 0; + pf->fd_ip6_filter_cnt = 0; +} + +/** * i40e_fdir_filter_restore - Restore the Sideband Flow Director filters * @vsi: Pointer to the targeted VSI * @@ -3512,10 +3530,7 @@ static void i40e_fdir_filter_restore(struct i40e_vsi *vsi) return; /* Reset FDir counters as we're replaying all existing filters */ - pf->fd_tcp4_filter_cnt = 0; - pf->fd_udp4_filter_cnt = 0; - pf->fd_sctp4_filter_cnt = 0; - pf->fd_ip4_filter_cnt = 0; + i40e_reset_fdir_filter_cnt(pf); hlist_for_each_entry_safe(filter, node, &pf->fdir_filter_list, fdir_node) { @@ -5291,6 +5306,7 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, vsi->seid); return ret; } + memset(&bw_data, 0, sizeof(bw_data)); bw_data.tc_valid_bits = enabled_tc; for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) bw_data.tc_bw_credits[i] = bw_share[i]; @@ -5943,6 +5959,7 @@ static int i40e_channel_config_bw(struct i40e_vsi *vsi, struct i40e_channel *ch, i40e_status ret; int i; + memset(&bw_data, 0, sizeof(bw_data)); bw_data.tc_valid_bits = ch->enabled_tc; for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) bw_data.tc_bw_credits[i] = bw_share[i]; @@ -6398,6 +6415,9 @@ static void i40e_dcb_reconfigure(struct i40e_pf *pf) /* Enable the TCs available on PF to all VEBs */ tc_map = i40e_pf_get_tc_map(pf); + if (tc_map == I40E_DEFAULT_TRAFFIC_CLASS) + return; + for (v = 0; v < I40E_MAX_VEB; v++) { if (!pf->veb[v]) continue; @@ -6465,6 +6485,316 @@ static int i40e_resume_port_tx(struct i40e_pf *pf) } /** + * i40e_suspend_port_tx - Suspend port Tx + * @pf: PF struct + * + * Suspend a port's Tx and issue a PF reset in case of failure. + **/ +static int i40e_suspend_port_tx(struct i40e_pf *pf) +{ + struct i40e_hw *hw = &pf->hw; + int ret; + + ret = i40e_aq_suspend_port_tx(hw, pf->mac_seid, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "Suspend Port Tx failed, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + /* Schedule PF reset to recover */ + set_bit(__I40E_PF_RESET_REQUESTED, pf->state); + i40e_service_event_schedule(pf); + } + + return ret; +} + +/** + * i40e_hw_set_dcb_config - Program new DCBX settings into HW + * @pf: PF being configured + * @new_cfg: New DCBX configuration + * + * Program DCB settings into HW and reconfigure VEB/VSIs on + * given PF. Uses "Set LLDP MIB" AQC to program the hardware. + **/ +static int i40e_hw_set_dcb_config(struct i40e_pf *pf, + struct i40e_dcbx_config *new_cfg) +{ + struct i40e_dcbx_config *old_cfg = &pf->hw.local_dcbx_config; + int ret; + + /* Check if need reconfiguration */ + if (!memcmp(&new_cfg, &old_cfg, sizeof(new_cfg))) { + dev_dbg(&pf->pdev->dev, "No Change in DCB Config required.\n"); + return 0; + } + + /* Config change disable all VSIs */ + i40e_pf_quiesce_all_vsi(pf); + + /* Copy the new config to the current config */ + *old_cfg = *new_cfg; + old_cfg->etsrec = old_cfg->etscfg; + ret = i40e_set_dcb_config(&pf->hw); + if (ret) { + dev_info(&pf->pdev->dev, + "Set DCB Config failed, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + goto out; + } + + /* Changes in configuration update VEB/VSI */ + i40e_dcb_reconfigure(pf); +out: + /* In case of reset do not try to resume anything */ + if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) { + /* Re-start the VSIs if disabled */ + ret = i40e_resume_port_tx(pf); + /* In case of error no point in resuming VSIs */ + if (ret) + goto err; + i40e_pf_unquiesce_all_vsi(pf); + } +err: + return ret; +} + +/** + * i40e_hw_dcb_config - Program new DCBX settings into HW + * @pf: PF being configured + * @new_cfg: New DCBX configuration + * + * Program DCB settings into HW and reconfigure VEB/VSIs on + * given PF + **/ +int i40e_hw_dcb_config(struct i40e_pf *pf, struct i40e_dcbx_config *new_cfg) +{ + struct i40e_aqc_configure_switching_comp_ets_data ets_data; + u8 prio_type[I40E_MAX_TRAFFIC_CLASS] = {0}; + u32 mfs_tc[I40E_MAX_TRAFFIC_CLASS]; + struct i40e_dcbx_config *old_cfg; + u8 mode[I40E_MAX_TRAFFIC_CLASS]; + struct i40e_rx_pb_config pb_cfg; + struct i40e_hw *hw = &pf->hw; + u8 num_ports = hw->num_ports; + bool need_reconfig; + int ret = -EINVAL; + u8 lltc_map = 0; + u8 tc_map = 0; + u8 new_numtc; + u8 i; + + dev_dbg(&pf->pdev->dev, "Configuring DCB registers directly\n"); + /* Un-pack information to Program ETS HW via shared API + * numtc, tcmap + * LLTC map + * ETS/NON-ETS arbiter mode + * max exponent (credit refills) + * Total number of ports + * PFC priority bit-map + * Priority Table + * BW % per TC + * Arbiter mode between UPs sharing same TC + * TSA table (ETS or non-ETS) + * EEE enabled or not + * MFS TC table + */ + + new_numtc = i40e_dcb_get_num_tc(new_cfg); + + memset(&ets_data, 0, sizeof(ets_data)); + for (i = 0; i < new_numtc; i++) { + tc_map |= BIT(i); + switch (new_cfg->etscfg.tsatable[i]) { + case I40E_IEEE_TSA_ETS: + prio_type[i] = I40E_DCB_PRIO_TYPE_ETS; + ets_data.tc_bw_share_credits[i] = + new_cfg->etscfg.tcbwtable[i]; + break; + case I40E_IEEE_TSA_STRICT: + prio_type[i] = I40E_DCB_PRIO_TYPE_STRICT; + lltc_map |= BIT(i); + ets_data.tc_bw_share_credits[i] = + I40E_DCB_STRICT_PRIO_CREDITS; + break; + default: + /* Invalid TSA type */ + need_reconfig = false; + goto out; + } + } + + old_cfg = &hw->local_dcbx_config; + /* Check if need reconfiguration */ + need_reconfig = i40e_dcb_need_reconfig(pf, old_cfg, new_cfg); + + /* If needed, enable/disable frame tagging, disable all VSIs + * and suspend port tx + */ + if (need_reconfig) { + /* Enable DCB tagging only when more than one TC */ + if (new_numtc > 1) + pf->flags |= I40E_FLAG_DCB_ENABLED; + else + pf->flags &= ~I40E_FLAG_DCB_ENABLED; + + set_bit(__I40E_PORT_SUSPENDED, pf->state); + /* Reconfiguration needed quiesce all VSIs */ + i40e_pf_quiesce_all_vsi(pf); + ret = i40e_suspend_port_tx(pf); + if (ret) + goto err; + } + + /* Configure Port ETS Tx Scheduler */ + ets_data.tc_valid_bits = tc_map; + ets_data.tc_strict_priority_flags = lltc_map; + ret = i40e_aq_config_switch_comp_ets + (hw, pf->mac_seid, &ets_data, + i40e_aqc_opc_modify_switching_comp_ets, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "Modify Port ETS failed, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + goto out; + } + + /* Configure Rx ETS HW */ + memset(&mode, I40E_DCB_ARB_MODE_ROUND_ROBIN, sizeof(mode)); + i40e_dcb_hw_set_num_tc(hw, new_numtc); + i40e_dcb_hw_rx_fifo_config(hw, I40E_DCB_ARB_MODE_ROUND_ROBIN, + I40E_DCB_ARB_MODE_STRICT_PRIORITY, + I40E_DCB_DEFAULT_MAX_EXPONENT, + lltc_map); + i40e_dcb_hw_rx_cmd_monitor_config(hw, new_numtc, num_ports); + i40e_dcb_hw_rx_ets_bw_config(hw, new_cfg->etscfg.tcbwtable, mode, + prio_type); + i40e_dcb_hw_pfc_config(hw, new_cfg->pfc.pfcenable, + new_cfg->etscfg.prioritytable); + i40e_dcb_hw_rx_up2tc_config(hw, new_cfg->etscfg.prioritytable); + + /* Configure Rx Packet Buffers in HW */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + mfs_tc[i] = pf->vsi[pf->lan_vsi]->netdev->mtu; + mfs_tc[i] += I40E_PACKET_HDR_PAD; + } + + i40e_dcb_hw_calculate_pool_sizes(hw, num_ports, + false, new_cfg->pfc.pfcenable, + mfs_tc, &pb_cfg); + i40e_dcb_hw_rx_pb_config(hw, &pf->pb_cfg, &pb_cfg); + + /* Update the local Rx Packet buffer config */ + pf->pb_cfg = pb_cfg; + + /* Inform the FW about changes to DCB configuration */ + ret = i40e_aq_dcb_updated(&pf->hw, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "DCB Updated failed, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + goto out; + } + + /* Update the port DCBx configuration */ + *old_cfg = *new_cfg; + + /* Changes in configuration update VEB/VSI */ + i40e_dcb_reconfigure(pf); +out: + /* Re-start the VSIs if disabled */ + if (need_reconfig) { + ret = i40e_resume_port_tx(pf); + + clear_bit(__I40E_PORT_SUSPENDED, pf->state); + /* In case of error no point in resuming VSIs */ + if (ret) + goto err; + + /* Wait for the PF's queues to be disabled */ + ret = i40e_pf_wait_queues_disabled(pf); + if (ret) { + /* Schedule PF reset to recover */ + set_bit(__I40E_PF_RESET_REQUESTED, pf->state); + i40e_service_event_schedule(pf); + goto err; + } else { + i40e_pf_unquiesce_all_vsi(pf); + set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); + set_bit(__I40E_CLIENT_L2_CHANGE, pf->state); + } + /* registers are set, lets apply */ + if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB) + ret = i40e_hw_set_dcb_config(pf, new_cfg); + } + +err: + return ret; +} + +/** + * i40e_dcb_sw_default_config - Set default DCB configuration when DCB in SW + * @pf: PF being queried + * + * Set default DCB configuration in case DCB is to be done in SW. + **/ +int i40e_dcb_sw_default_config(struct i40e_pf *pf) +{ + struct i40e_dcbx_config *dcb_cfg = &pf->hw.local_dcbx_config; + struct i40e_aqc_configure_switching_comp_ets_data ets_data; + struct i40e_hw *hw = &pf->hw; + int err; + + if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB) { + /* Update the local cached instance with TC0 ETS */ + memset(&pf->tmp_cfg, 0, sizeof(struct i40e_dcbx_config)); + pf->tmp_cfg.etscfg.willing = I40E_IEEE_DEFAULT_ETS_WILLING; + pf->tmp_cfg.etscfg.maxtcs = 0; + pf->tmp_cfg.etscfg.tcbwtable[0] = I40E_IEEE_DEFAULT_ETS_TCBW; + pf->tmp_cfg.etscfg.tsatable[0] = I40E_IEEE_TSA_ETS; + pf->tmp_cfg.pfc.willing = I40E_IEEE_DEFAULT_PFC_WILLING; + pf->tmp_cfg.pfc.pfccap = I40E_MAX_TRAFFIC_CLASS; + /* FW needs one App to configure HW */ + pf->tmp_cfg.numapps = I40E_IEEE_DEFAULT_NUM_APPS; + pf->tmp_cfg.app[0].selector = I40E_APP_SEL_ETHTYPE; + pf->tmp_cfg.app[0].priority = I40E_IEEE_DEFAULT_APP_PRIO; + pf->tmp_cfg.app[0].protocolid = I40E_APP_PROTOID_FCOE; + + return i40e_hw_set_dcb_config(pf, &pf->tmp_cfg); + } + + memset(&ets_data, 0, sizeof(ets_data)); + ets_data.tc_valid_bits = I40E_DEFAULT_TRAFFIC_CLASS; /* TC0 only */ + ets_data.tc_strict_priority_flags = 0; /* ETS */ + ets_data.tc_bw_share_credits[0] = I40E_IEEE_DEFAULT_ETS_TCBW; /* 100% to TC0 */ + + /* Enable ETS on the Physical port */ + err = i40e_aq_config_switch_comp_ets + (hw, pf->mac_seid, &ets_data, + i40e_aqc_opc_enable_switching_comp_ets, NULL); + if (err) { + dev_info(&pf->pdev->dev, + "Enable Port ETS failed, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, err), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + err = -ENOENT; + goto out; + } + + /* Update the local cached instance with TC0 ETS */ + dcb_cfg->etscfg.willing = I40E_IEEE_DEFAULT_ETS_WILLING; + dcb_cfg->etscfg.cbs = 0; + dcb_cfg->etscfg.maxtcs = I40E_MAX_TRAFFIC_CLASS; + dcb_cfg->etscfg.tcbwtable[0] = I40E_IEEE_DEFAULT_ETS_TCBW; + +out: + return err; +} + +/** * i40e_init_pf_dcb - Initialize DCB configuration * @pf: PF being configured * @@ -6474,18 +6804,31 @@ static int i40e_resume_port_tx(struct i40e_pf *pf) static int i40e_init_pf_dcb(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; - int err = 0; + int err; /* Do not enable DCB for SW1 and SW2 images even if the FW is capable * Also do not enable DCBx if FW LLDP agent is disabled */ - if ((pf->hw_features & I40E_HW_NO_DCB_SUPPORT) || - (pf->flags & I40E_FLAG_DISABLE_FW_LLDP)) { - dev_info(&pf->pdev->dev, "DCB is not supported or FW LLDP is disabled\n"); + if (pf->hw_features & I40E_HW_NO_DCB_SUPPORT) { + dev_info(&pf->pdev->dev, "DCB is not supported.\n"); err = I40E_NOT_SUPPORTED; goto out; } - + if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) { + dev_info(&pf->pdev->dev, "FW LLDP is disabled, attempting SW DCB\n"); + err = i40e_dcb_sw_default_config(pf); + if (err) { + dev_info(&pf->pdev->dev, "Could not initialize SW DCB\n"); + goto out; + } + dev_info(&pf->pdev->dev, "SW DCB initialization succeeded.\n"); + pf->dcbx_cap = DCB_CAP_DCBX_HOST | + DCB_CAP_DCBX_VER_IEEE; + /* at init capable but disabled */ + pf->flags |= I40E_FLAG_DCB_CAPABLE; + pf->flags &= ~I40E_FLAG_DCB_ENABLED; + goto out; + } err = i40e_init_dcb(hw, true); if (!err) { /* Device/Function is not DCBX capable */ @@ -6525,6 +6868,40 @@ out: #endif /* CONFIG_I40E_DCB */ /** + * i40e_set_lldp_forwarding - set forwarding of lldp frames + * @pf: PF being configured + * @enable: if forwarding to OS shall be enabled + * + * Toggle forwarding of lldp frames behavior, + * When passing DCB control from firmware to software + * lldp frames must be forwarded to the software based + * lldp agent. + */ +void i40e_set_lldp_forwarding(struct i40e_pf *pf, bool enable) +{ + if (pf->lan_vsi == I40E_NO_VSI) + return; + + if (!pf->vsi[pf->lan_vsi]) + return; + + /* No need to check the outcome, commands may fail + * if desired value is already set + */ + i40e_aq_add_rem_control_packet_filter(&pf->hw, NULL, ETH_P_LLDP, + I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TX | + I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC, + pf->vsi[pf->lan_vsi]->seid, 0, + enable, NULL, NULL); + + i40e_aq_add_rem_control_packet_filter(&pf->hw, NULL, ETH_P_LLDP, + I40E_AQC_ADD_CONTROL_PACKET_FLAGS_RX | + I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC, + pf->vsi[pf->lan_vsi]->seid, 0, + enable, NULL, NULL); +} + +/** * i40e_print_link_message - print link up or down * @vsi: the VSI for which link needs a message * @isup: true of link is up, false otherwise @@ -8287,7 +8664,6 @@ int i40e_open(struct net_device *netdev) TCP_FLAG_FIN | TCP_FLAG_CWR) >> 16); wr32(&pf->hw, I40E_GLLAN_TSOMSK_L, be32_to_cpu(TCP_FLAG_CWR) >> 16); - udp_tunnel_get_rx_info(netdev); return 0; @@ -8402,32 +8778,51 @@ static void i40e_fdir_filter_exit(struct i40e_pf *pf) INIT_LIST_HEAD(&pf->l4_flex_pit_list); pf->fdir_pf_active_filters = 0; - pf->fd_tcp4_filter_cnt = 0; - pf->fd_udp4_filter_cnt = 0; - pf->fd_sctp4_filter_cnt = 0; - pf->fd_ip4_filter_cnt = 0; + i40e_reset_fdir_filter_cnt(pf); /* Reprogram the default input set for TCP/IPv4 */ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP, I40E_L3_SRC_MASK | I40E_L3_DST_MASK | I40E_L4_SRC_MASK | I40E_L4_DST_MASK); + /* Reprogram the default input set for TCP/IPv6 */ + i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV6_TCP, + I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK | + I40E_L4_SRC_MASK | I40E_L4_DST_MASK); + /* Reprogram the default input set for UDP/IPv4 */ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_UDP, I40E_L3_SRC_MASK | I40E_L3_DST_MASK | I40E_L4_SRC_MASK | I40E_L4_DST_MASK); + /* Reprogram the default input set for UDP/IPv6 */ + i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV6_UDP, + I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK | + I40E_L4_SRC_MASK | I40E_L4_DST_MASK); + /* Reprogram the default input set for SCTP/IPv4 */ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_SCTP, I40E_L3_SRC_MASK | I40E_L3_DST_MASK | I40E_L4_SRC_MASK | I40E_L4_DST_MASK); + /* Reprogram the default input set for SCTP/IPv6 */ + i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV6_SCTP, + I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK | + I40E_L4_SRC_MASK | I40E_L4_DST_MASK); + /* Reprogram the default input set for Other/IPv4 */ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_OTHER, I40E_L3_SRC_MASK | I40E_L3_DST_MASK); i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_FRAG_IPV4, I40E_L3_SRC_MASK | I40E_L3_DST_MASK); + + /* Reprogram the default input set for Other/IPv6 */ + i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV6_OTHER, + I40E_L3_SRC_MASK | I40E_L3_DST_MASK); + + i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_FRAG_IPV6, + I40E_L3_SRC_MASK | I40E_L3_DST_MASK); } /** @@ -8543,7 +8938,7 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) * * Resets PF and reinitializes PFs VSI. */ - i40e_prep_for_reset(pf, lock_acquired); + i40e_prep_for_reset(pf); i40e_reset_and_rebuild(pf, true, lock_acquired); } else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) { @@ -8653,6 +9048,14 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, int ret = 0; u8 type; + /* X710-T*L 2.5G and 5G speeds don't support DCB */ + if (I40E_IS_X710TL_DEVICE(hw->device_id) && + (hw->phy.link_info.link_speed & + ~(I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB)) && + !(pf->flags & I40E_FLAG_DCB_CAPABLE)) + /* let firmware decide if the DCB should be disabled */ + pf->flags |= I40E_FLAG_DCB_CAPABLE; + /* Not DCB capable or capability disabled */ if (!(pf->flags & I40E_FLAG_DCB_CAPABLE)) return ret; @@ -8684,10 +9087,20 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, /* Get updated DCBX data from firmware */ ret = i40e_get_dcb_config(&pf->hw); if (ret) { - dev_info(&pf->pdev->dev, - "Failed querying DCB configuration data from firmware, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + /* X710-T*L 2.5G and 5G speeds don't support DCB */ + if (I40E_IS_X710TL_DEVICE(hw->device_id) && + (hw->phy.link_info.link_speed & + (I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB))) { + dev_warn(&pf->pdev->dev, + "DCB is not supported for X710-T*L 2.5/5G speeds\n"); + pf->flags &= ~I40E_FLAG_DCB_CAPABLE; + } else { + dev_info(&pf->pdev->dev, + "Failed querying DCB configuration data from firmware, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + } goto exit; } @@ -8891,8 +9304,17 @@ static void i40e_delete_invalid_filter(struct i40e_pf *pf, case SCTP_V4_FLOW: pf->fd_sctp4_filter_cnt--; break; + case TCP_V6_FLOW: + pf->fd_tcp6_filter_cnt--; + break; + case UDP_V6_FLOW: + pf->fd_udp6_filter_cnt--; + break; + case SCTP_V6_FLOW: + pf->fd_udp6_filter_cnt--; + break; case IP_USER_FLOW: - switch (filter->ip4_proto) { + switch (filter->ipl4_proto) { case IPPROTO_TCP: pf->fd_tcp4_filter_cnt--; break; @@ -8907,6 +9329,22 @@ static void i40e_delete_invalid_filter(struct i40e_pf *pf, break; } break; + case IPV6_USER_FLOW: + switch (filter->ipl4_proto) { + case IPPROTO_TCP: + pf->fd_tcp6_filter_cnt--; + break; + case IPPROTO_UDP: + pf->fd_udp6_filter_cnt--; + break; + case IPPROTO_SCTP: + pf->fd_sctp6_filter_cnt--; + break; + case IPPROTO_IP: + pf->fd_ip6_filter_cnt--; + break; + } + break; } /* Remove the filter from the list and free memory */ @@ -8940,7 +9378,7 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf) * rules active. */ if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) && - (pf->fd_tcp4_filter_cnt == 0)) + pf->fd_tcp4_filter_cnt == 0 && pf->fd_tcp6_filter_cnt == 0) i40e_reenable_fdir_atr(pf); /* if hw had a problem adding a filter, delete it */ @@ -9109,6 +9547,9 @@ static void i40e_link_event(struct i40e_pf *pf) u8 new_link_speed, old_link_speed; i40e_status status; bool new_link, old_link; +#ifdef CONFIG_I40E_DCB + int err; +#endif /* CONFIG_I40E_DCB */ /* set this to force the get_link_status call to refresh state */ pf->hw.phy.get_link_info = true; @@ -9152,6 +9593,31 @@ static void i40e_link_event(struct i40e_pf *pf) if (pf->flags & I40E_FLAG_PTP) i40e_ptp_set_increment(pf); +#ifdef CONFIG_I40E_DCB + if (new_link == old_link) + return; + /* Not SW DCB so firmware will take care of default settings */ + if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) + return; + + /* We cover here only link down, as after link up in case of SW DCB + * SW LLDP agent will take care of setting it up + */ + if (!new_link) { + dev_dbg(&pf->pdev->dev, "Reconfig DCB to single TC as result of Link Down\n"); + memset(&pf->tmp_cfg, 0, sizeof(pf->tmp_cfg)); + err = i40e_dcb_sw_default_config(pf); + if (err) { + pf->flags &= ~(I40E_FLAG_DCB_CAPABLE | + I40E_FLAG_DCB_ENABLED); + } else { + pf->dcbx_cap = DCB_CAP_DCBX_HOST | + DCB_CAP_DCBX_VER_IEEE; + pf->flags |= I40E_FLAG_DCB_CAPABLE; + pf->flags &= ~I40E_FLAG_DCB_ENABLED; + } + } +#endif /* CONFIG_I40E_DCB */ } /** @@ -9228,7 +9694,7 @@ static void i40e_reset_subtask(struct i40e_pf *pf) * precedence before starting a new reset sequence. */ if (test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) { - i40e_prep_for_reset(pf, false); + i40e_prep_for_reset(pf); i40e_reset(pf); i40e_rebuild(pf, false, false); } @@ -9360,7 +9826,9 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) switch (opcode) { case i40e_aqc_opc_get_link_status: + rtnl_lock(); i40e_handle_link_event(pf, &event); + rtnl_unlock(); break; case i40e_aqc_opc_send_msg_to_pf: ret = i40e_vc_process_vf_msg(pf, @@ -9374,7 +9842,7 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) dev_dbg(&pf->pdev->dev, "ARQ: Update LLDP MIB event received\n"); #ifdef CONFIG_I40E_DCB rtnl_lock(); - ret = i40e_handle_lldp_event(pf, &event); + i40e_handle_lldp_event(pf, &event); rtnl_unlock(); #endif /* CONFIG_I40E_DCB */ break; @@ -9860,12 +10328,10 @@ static int i40e_rebuild_channels(struct i40e_vsi *vsi) /** * i40e_prep_for_reset - prep for the core to reset * @pf: board private structure - * @lock_acquired: indicates whether or not the lock has been acquired - * before this function was called. * * Close up the VFs and other things in prep for PF Reset. **/ -static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired) +static void i40e_prep_for_reset(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; i40e_status ret = 0; @@ -9880,12 +10346,7 @@ static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired) dev_dbg(&pf->pdev->dev, "Tearing down internal switch for reset\n"); /* quiesce the VSIs and their queues that are not already DOWN */ - /* pf_quiesce_all_vsi modifies netdev structures -rtnl_lock needed */ - if (!lock_acquired) - rtnl_lock(); i40e_pf_quiesce_all_vsi(pf); - if (!lock_acquired) - rtnl_unlock(); for (v = 0; v < pf->num_alloc_vsi; v++) { if (pf->vsi[v]) @@ -10097,24 +10558,41 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) goto end_core_reset; } - /* Enable FW to write a default DCB config on link-up */ - i40e_aq_set_dcb_parameters(hw, true, NULL); - -#ifdef CONFIG_I40E_DCB - ret = i40e_init_pf_dcb(pf); - if (ret) { - dev_info(&pf->pdev->dev, "DCB init failed %d, disabled\n", ret); - pf->flags &= ~I40E_FLAG_DCB_CAPABLE; - /* Continue without DCB enabled */ - } -#endif /* CONFIG_I40E_DCB */ - /* do basic switch setup */ if (!lock_acquired) rtnl_lock(); ret = i40e_setup_pf_switch(pf, reinit); if (ret) goto end_unlock; +#ifdef CONFIG_I40E_DCB + /* Enable FW to write a default DCB config on link-up + * unless I40E_FLAG_TC_MQPRIO was enabled or DCB + * is not supported with new link speed + */ + if (pf->flags & I40E_FLAG_TC_MQPRIO) { + i40e_aq_set_dcb_parameters(hw, false, NULL); + } else { + if (I40E_IS_X710TL_DEVICE(hw->device_id) && + (hw->phy.link_info.link_speed & + (I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB))) { + i40e_aq_set_dcb_parameters(hw, false, NULL); + dev_warn(&pf->pdev->dev, + "DCB is not supported for X710-T*L 2.5/5G speeds\n"); + pf->flags &= ~I40E_FLAG_DCB_CAPABLE; + } else { + i40e_aq_set_dcb_parameters(hw, true, NULL); + ret = i40e_init_pf_dcb(pf); + if (ret) { + dev_info(&pf->pdev->dev, "DCB init failed %d, disabled\n", + ret); + pf->flags &= ~I40E_FLAG_DCB_CAPABLE; + /* Continue without DCB enabled */ + } + } + } + +#endif /* CONFIG_I40E_DCB */ + /* The driver only wants link up/down and module qualification * reports from firmware. Note the negative logic. */ @@ -10251,6 +10729,10 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) */ i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw, pf->main_vsi_seid); +#ifdef CONFIG_I40E_DCB + if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) + i40e_set_lldp_forwarding(pf, true); +#endif /* CONFIG_I40E_DCB */ /* restart the VSIs that were rebuilt and running before the reset */ i40e_pf_unquiesce_all_vsi(pf); @@ -10317,7 +10799,7 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit, **/ static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired) { - i40e_prep_for_reset(pf, lock_acquired); + i40e_prep_for_reset(pf); i40e_reset_and_rebuild(pf, false, lock_acquired); } @@ -11651,7 +12133,7 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) u16 qcount; vsi->req_queue_pairs = queue_count; - i40e_prep_for_reset(pf, true); + i40e_prep_for_reset(pf); pf->alloc_rss_size = new_rss_size; @@ -12448,9 +12930,10 @@ out_err: * i40e_xdp_setup - add/remove an XDP program * @vsi: VSI to changed * @prog: XDP program + * @extack: netlink extended ack **/ -static int i40e_xdp_setup(struct i40e_vsi *vsi, - struct bpf_prog *prog) +static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, + struct netlink_ext_ack *extack) { int frame_size = vsi->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; struct i40e_pf *pf = vsi->back; @@ -12459,8 +12942,10 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, int i; /* Don't allow frames that span over multiple buffers */ - if (frame_size > vsi->rx_buf_len) + if (frame_size > vsi->rx_buf_len) { + NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP"); return -EINVAL; + } if (!i40e_enabled_xdp_vsi(vsi) && !prog) return 0; @@ -12469,7 +12954,7 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog); if (need_reset) - i40e_prep_for_reset(pf, true); + i40e_prep_for_reset(pf); old_prog = xchg(&vsi->xdp_prog, prog); @@ -12769,7 +13254,7 @@ static int i40e_xdp(struct net_device *dev, switch (xdp->command) { case XDP_SETUP_PROG: - return i40e_xdp_setup(vsi, xdp->prog); + return i40e_xdp_setup(vsi, xdp->prog, xdp->extack); case XDP_SETUP_XSK_POOL: return i40e_xsk_pool_setup(vsi, xdp->xsk.pool, xdp->xsk.queue_id); @@ -14704,6 +15189,10 @@ err_switch_setup: static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct i40e_aq_get_phy_abilities_resp abilities; +#ifdef CONFIG_I40E_DCB + enum i40e_get_fw_lldp_status_resp lldp_status; + i40e_status status; +#endif /* CONFIG_I40E_DCB */ struct i40e_pf *pf; struct i40e_hw *hw; static u16 pfs_found; @@ -14960,6 +15449,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, pf); pci_save_state(pdev); +#ifdef CONFIG_I40E_DCB + status = i40e_get_fw_lldp_status(&pf->hw, &lldp_status); + (!status && + lldp_status == I40E_GET_FW_LLDP_STATUS_ENABLED) ? + (pf->flags &= ~I40E_FLAG_DISABLE_FW_LLDP) : + (pf->flags |= I40E_FLAG_DISABLE_FW_LLDP); dev_info(&pdev->dev, (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) ? "FW LLDP is disabled\n" : @@ -14968,7 +15463,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Enable FW to write default DCB config on link-up */ i40e_aq_set_dcb_parameters(hw, true, NULL); -#ifdef CONFIG_I40E_DCB err = i40e_init_pf_dcb(pf); if (err) { dev_info(&pdev->dev, "DCB init failed %d, disabled\n", err); @@ -15261,6 +15755,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) */ i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw, pf->main_vsi_seid); +#ifdef CONFIG_I40E_DCB + if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) + i40e_set_lldp_forwarding(pf, true); +#endif /* CONFIG_I40E_DCB */ if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) || (pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4)) @@ -15463,7 +15961,7 @@ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev, /* shutdown all operations */ if (!test_bit(__I40E_SUSPENDED, pf->state)) - i40e_prep_for_reset(pf, false); + i40e_prep_for_reset(pf); /* Request a slot reset */ return PCI_ERS_RESULT_NEED_RESET; @@ -15513,7 +16011,7 @@ static void i40e_pci_error_reset_prepare(struct pci_dev *pdev) { struct i40e_pf *pf = pci_get_drvdata(pdev); - i40e_prep_for_reset(pf, false); + i40e_prep_for_reset(pf); } /** @@ -15617,7 +16115,7 @@ static void i40e_shutdown(struct pci_dev *pdev) if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE)) i40e_enable_mc_magic_wake(pf); - i40e_prep_for_reset(pf, false); + i40e_prep_for_reset(pf); wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); @@ -15676,7 +16174,7 @@ static int __maybe_unused i40e_suspend(struct device *dev) */ rtnl_lock(); - i40e_prep_for_reset(pf, true); + i40e_prep_for_reset(pf); wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 5c1378641b3b..aaea297640e0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 2013 - 2018 Intel Corporation. */ +/* Copyright(c) 2013 - 2021 Intel Corporation. */ #ifndef _I40E_PROTOTYPE_H_ #define _I40E_PROTOTYPE_H_ @@ -200,6 +200,10 @@ i40e_status i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type, u8 mib_type, void *buff, u16 buff_size, u16 *local_len, u16 *remote_len, struct i40e_asq_cmd_details *cmd_details); +enum i40e_status_code +i40e_aq_set_lldp_mib(struct i40e_hw *hw, + u8 mib_type, void *buff, u16 buff_size, + struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw, bool enable_update, struct i40e_asq_cmd_details *cmd_details); @@ -289,6 +293,9 @@ i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid, u8 filter_count); i40e_status i40e_read_lldp_cfg(struct i40e_hw *hw, struct i40e_lldp_variables *lldp_cfg); +enum i40e_status_code +i40e_aq_suspend_port_tx(struct i40e_hw *hw, u16 seid, + struct i40e_asq_cmd_details *cmd_details); /* i40e_common */ i40e_status i40e_init_shared_code(struct i40e_hw *hw); i40e_status i40e_pf_reset(struct i40e_hw *hw); diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 564df22f3f46..36f7b27a04ae 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 2013 - 2018 Intel Corporation. */ +/* Copyright(c) 2013 - 2021 Intel Corporation. */ #ifndef _I40E_REGISTER_H_ #define _I40E_REGISTER_H_ @@ -34,12 +34,137 @@ #define I40E_PF_ATQLEN_ATQENABLE_SHIFT 31 #define I40E_PF_ATQLEN_ATQENABLE_MASK I40E_MASK(0x1u, I40E_PF_ATQLEN_ATQENABLE_SHIFT) #define I40E_PF_ATQT 0x00080400 /* Reset: EMPR */ +#define I40E_PRT_SWR_PM_THR 0x0026CD00 /* Reset: CORER */ +#define I40E_PRT_SWR_PM_THR_THRESHOLD_SHIFT 0 +#define I40E_PRT_SWR_PM_THR_THRESHOLD_MASK I40E_MASK(0xFF, I40E_PRT_SWR_PM_THR_THRESHOLD_SHIFT) +#define I40E_PRTDCB_FCCFG 0x001E4640 /* Reset: GLOBR */ +#define I40E_PRTDCB_FCCFG_TFCE_SHIFT 3 +#define I40E_PRTDCB_FCCFG_TFCE_MASK I40E_MASK(0x3, I40E_PRTDCB_FCCFG_TFCE_SHIFT) #define I40E_PRTDCB_GENC 0x00083000 /* Reset: CORER */ +#define I40E_PRTDCB_GENC_NUMTC_SHIFT 2 +#define I40E_PRTDCB_GENC_NUMTC_MASK I40E_MASK(0xF, I40E_PRTDCB_GENC_NUMTC_SHIFT) #define I40E_PRTDCB_GENC_PFCLDA_SHIFT 16 #define I40E_PRTDCB_GENC_PFCLDA_MASK I40E_MASK(0xFFFF, I40E_PRTDCB_GENC_PFCLDA_SHIFT) #define I40E_PRTDCB_GENS 0x00083020 /* Reset: CORER */ #define I40E_PRTDCB_GENS_DCBX_STATUS_SHIFT 0 #define I40E_PRTDCB_GENS_DCBX_STATUS_MASK I40E_MASK(0x7, I40E_PRTDCB_GENS_DCBX_STATUS_SHIFT) +#define I40E_PRTDCB_MFLCN 0x001E2400 /* Reset: GLOBR */ +#define I40E_PRTDCB_MFLCN_PMCF_SHIFT 0 +#define I40E_PRTDCB_MFLCN_PMCF_MASK I40E_MASK(0x1, I40E_PRTDCB_MFLCN_PMCF_SHIFT) +#define I40E_PRTDCB_MFLCN_DPF_SHIFT 1 +#define I40E_PRTDCB_MFLCN_DPF_MASK I40E_MASK(0x1, I40E_PRTDCB_MFLCN_DPF_SHIFT) +#define I40E_PRTDCB_MFLCN_RPFCM_SHIFT 2 +#define I40E_PRTDCB_MFLCN_RPFCM_MASK I40E_MASK(0x1, I40E_PRTDCB_MFLCN_RPFCM_SHIFT) +#define I40E_PRTDCB_MFLCN_RFCE_SHIFT 3 +#define I40E_PRTDCB_MFLCN_RFCE_MASK I40E_MASK(0x1, I40E_PRTDCB_MFLCN_RFCE_SHIFT) +#define I40E_PRTDCB_MFLCN_RPFCE_SHIFT 4 +#define I40E_PRTDCB_MFLCN_RPFCE_MASK I40E_MASK(0xFF, I40E_PRTDCB_MFLCN_RPFCE_SHIFT) +#define I40E_PRTDCB_RETSC 0x001223E0 /* Reset: CORER */ +#define I40E_PRTDCB_RETSC_ETS_MODE_SHIFT 0 +#define I40E_PRTDCB_RETSC_ETS_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_RETSC_ETS_MODE_SHIFT) +#define I40E_PRTDCB_RETSC_NON_ETS_MODE_SHIFT 1 +#define I40E_PRTDCB_RETSC_NON_ETS_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_RETSC_NON_ETS_MODE_SHIFT) +#define I40E_PRTDCB_RETSC_ETS_MAX_EXP_SHIFT 2 +#define I40E_PRTDCB_RETSC_ETS_MAX_EXP_MASK I40E_MASK(0xF, I40E_PRTDCB_RETSC_ETS_MAX_EXP_SHIFT) +#define I40E_PRTDCB_RETSC_LLTC_SHIFT 8 +#define I40E_PRTDCB_RETSC_LLTC_MASK I40E_MASK(0xFF, I40E_PRTDCB_RETSC_LLTC_SHIFT) +#define I40E_PRTDCB_RETSTCC(_i) (0x00122180 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */ +#define I40E_PRTDCB_RETSTCC_MAX_INDEX 7 +#define I40E_PRTDCB_RETSTCC_BWSHARE_SHIFT 0 +#define I40E_PRTDCB_RETSTCC_BWSHARE_MASK I40E_MASK(0x7F, I40E_PRTDCB_RETSTCC_BWSHARE_SHIFT) +#define I40E_PRTDCB_RETSTCC_UPINTC_MODE_SHIFT 30 +#define I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_RETSTCC_UPINTC_MODE_SHIFT) +#define I40E_PRTDCB_RETSTCC_ETSTC_SHIFT 31 +#define I40E_PRTDCB_RETSTCC_ETSTC_MASK I40E_MASK(0x1u, I40E_PRTDCB_RETSTCC_ETSTC_SHIFT) +#define I40E_PRTDCB_RPPMC 0x001223A0 /* Reset: CORER */ +#define I40E_PRTDCB_RPPMC_LANRPPM_SHIFT 0 +#define I40E_PRTDCB_RPPMC_LANRPPM_MASK I40E_MASK(0xFF, I40E_PRTDCB_RPPMC_LANRPPM_SHIFT) +#define I40E_PRTDCB_RPPMC_RDMARPPM_SHIFT 8 +#define I40E_PRTDCB_RPPMC_RDMARPPM_MASK I40E_MASK(0xFF, I40E_PRTDCB_RPPMC_RDMARPPM_SHIFT) +#define I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_SHIFT 16 +#define I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_MASK I40E_MASK(0xFF, I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_SHIFT) +#define I40E_PRTDCB_RUP 0x001C0B00 /* Reset: CORER */ +#define I40E_PRTDCB_RUP_NOVLANUP_SHIFT 0 +#define I40E_PRTDCB_RUP_NOVLANUP_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP_NOVLANUP_SHIFT) +#define I40E_PRTDCB_RUP2TC 0x001C09A0 /* Reset: CORER */ +#define I40E_PRTDCB_RUP2TC_UP0TC_SHIFT 0 +#define I40E_PRTDCB_RUP2TC_UP0TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP0TC_SHIFT) +#define I40E_PRTDCB_RUP2TC_UP1TC_SHIFT 3 +#define I40E_PRTDCB_RUP2TC_UP1TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP1TC_SHIFT) +#define I40E_PRTDCB_RUP2TC_UP2TC_SHIFT 6 +#define I40E_PRTDCB_RUP2TC_UP2TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP2TC_SHIFT) +#define I40E_PRTDCB_RUP2TC_UP3TC_SHIFT 9 +#define I40E_PRTDCB_RUP2TC_UP3TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP3TC_SHIFT) +#define I40E_PRTDCB_RUP2TC_UP4TC_SHIFT 12 +#define I40E_PRTDCB_RUP2TC_UP4TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP4TC_SHIFT) +#define I40E_PRTDCB_RUP2TC_UP5TC_SHIFT 15 +#define I40E_PRTDCB_RUP2TC_UP5TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP5TC_SHIFT) +#define I40E_PRTDCB_RUP2TC_UP6TC_SHIFT 18 +#define I40E_PRTDCB_RUP2TC_UP6TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP6TC_SHIFT) +#define I40E_PRTDCB_RUP2TC_UP7TC_SHIFT 21 +#define I40E_PRTDCB_RUP2TC_UP7TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP7TC_SHIFT) +#define I40E_PRTDCB_RUPTQ(_i) (0x00122400 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */ +#define I40E_PRTDCB_RUPTQ_MAX_INDEX 7 +#define I40E_PRTDCB_RUPTQ_RXQNUM_SHIFT 0 +#define I40E_PRTDCB_RUPTQ_RXQNUM_MASK I40E_MASK(0x3FFF, I40E_PRTDCB_RUPTQ_RXQNUM_SHIFT) +#define I40E_PRTDCB_TC2PFC 0x001C0980 /* Reset: CORER */ +#define I40E_PRTDCB_TC2PFC_TC2PFC_SHIFT 0 +#define I40E_PRTDCB_TC2PFC_TC2PFC_MASK I40E_MASK(0xFF, I40E_PRTDCB_TC2PFC_TC2PFC_SHIFT) +#define I40E_PRTDCB_TCMSTC(_i) (0x000A0040 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */ +#define I40E_PRTDCB_TCMSTC_MAX_INDEX 7 +#define I40E_PRTDCB_TCMSTC_MSTC_SHIFT 0 +#define I40E_PRTDCB_TCMSTC_MSTC_MASK I40E_MASK(0xFFFFF, I40E_PRTDCB_TCMSTC_MSTC_SHIFT) +#define I40E_PRTDCB_TCPMC 0x000A21A0 /* Reset: CORER */ +#define I40E_PRTDCB_TCPMC_CPM_SHIFT 0 +#define I40E_PRTDCB_TCPMC_CPM_MASK I40E_MASK(0x1FFF, I40E_PRTDCB_TCPMC_CPM_SHIFT) +#define I40E_PRTDCB_TCPMC_LLTC_SHIFT 13 +#define I40E_PRTDCB_TCPMC_LLTC_MASK I40E_MASK(0xFF, I40E_PRTDCB_TCPMC_LLTC_SHIFT) +#define I40E_PRTDCB_TCPMC_TCPM_MODE_SHIFT 30 +#define I40E_PRTDCB_TCPMC_TCPM_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_TCPMC_TCPM_MODE_SHIFT) +#define I40E_PRTDCB_TCWSTC(_i) (0x000A2040 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */ +#define I40E_PRTDCB_TCWSTC_MAX_INDEX 7 +#define I40E_PRTDCB_TCWSTC_MSTC_SHIFT 0 +#define I40E_PRTDCB_TCWSTC_MSTC_MASK I40E_MASK(0xFFFFF, I40E_PRTDCB_TCWSTC_MSTC_SHIFT) +#define I40E_PRTDCB_TDPMC 0x000A0180 /* Reset: CORER */ +#define I40E_PRTDCB_TDPMC_DPM_SHIFT 0 +#define I40E_PRTDCB_TDPMC_DPM_MASK I40E_MASK(0xFF, I40E_PRTDCB_TDPMC_DPM_SHIFT) +#define I40E_PRTDCB_TDPMC_TCPM_MODE_SHIFT 30 +#define I40E_PRTDCB_TDPMC_TCPM_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_TDPMC_TCPM_MODE_SHIFT) +#define I40E_PRTDCB_TETSC_TCB 0x000AE060 /* Reset: CORER */ +#define I40E_PRTDCB_TETSC_TCB_EN_LL_STRICT_PRIORITY_SHIFT 0 +#define I40E_PRTDCB_TETSC_TCB_EN_LL_STRICT_PRIORITY_MASK I40E_MASK(0x1, \ + I40E_PRTDCB_TETSC_TCB_EN_LL_STRICT_PRIORITY_SHIFT) +#define I40E_PRTDCB_TETSC_TCB_LLTC_SHIFT 8 +#define I40E_PRTDCB_TETSC_TCB_LLTC_MASK I40E_MASK(0xFF, I40E_PRTDCB_TETSC_TCB_LLTC_SHIFT) +#define I40E_PRTDCB_TETSC_TPB 0x00098060 /* Reset: CORER */ +#define I40E_PRTDCB_TETSC_TPB_EN_LL_STRICT_PRIORITY_SHIFT 0 +#define I40E_PRTDCB_TETSC_TPB_EN_LL_STRICT_PRIORITY_MASK I40E_MASK(0x1, \ + I40E_PRTDCB_TETSC_TPB_EN_LL_STRICT_PRIORITY_SHIFT) +#define I40E_PRTDCB_TETSC_TPB_LLTC_SHIFT 8 +#define I40E_PRTDCB_TETSC_TPB_LLTC_MASK I40E_MASK(0xFF, I40E_PRTDCB_TETSC_TPB_LLTC_SHIFT) +#define I40E_PRTDCB_TFCS 0x001E4560 /* Reset: GLOBR */ +#define I40E_PRTDCB_TFCS_TXOFF_SHIFT 0 +#define I40E_PRTDCB_TFCS_TXOFF_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF_SHIFT) +#define I40E_PRTDCB_TFCS_TXOFF0_SHIFT 8 +#define I40E_PRTDCB_TFCS_TXOFF0_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF0_SHIFT) +#define I40E_PRTDCB_TFCS_TXOFF1_SHIFT 9 +#define I40E_PRTDCB_TFCS_TXOFF1_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF1_SHIFT) +#define I40E_PRTDCB_TFCS_TXOFF2_SHIFT 10 +#define I40E_PRTDCB_TFCS_TXOFF2_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF2_SHIFT) +#define I40E_PRTDCB_TFCS_TXOFF3_SHIFT 11 +#define I40E_PRTDCB_TFCS_TXOFF3_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF3_SHIFT) +#define I40E_PRTDCB_TFCS_TXOFF4_SHIFT 12 +#define I40E_PRTDCB_TFCS_TXOFF4_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF4_SHIFT) +#define I40E_PRTDCB_TFCS_TXOFF5_SHIFT 13 +#define I40E_PRTDCB_TFCS_TXOFF5_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF5_SHIFT) +#define I40E_PRTDCB_TFCS_TXOFF6_SHIFT 14 +#define I40E_PRTDCB_TFCS_TXOFF6_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF6_SHIFT) +#define I40E_PRTDCB_TFCS_TXOFF7_SHIFT 15 +#define I40E_PRTDCB_TFCS_TXOFF7_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF7_SHIFT) +#define I40E_PRTDCB_TPFCTS(_i) (0x001E4660 + ((_i) * 32)) /* _i=0...7 */ /* Reset: GLOBR */ +#define I40E_PRTDCB_TPFCTS_MAX_INDEX 7 +#define I40E_PRTDCB_TPFCTS_PFCTIMER_SHIFT 0 +#define I40E_PRTDCB_TPFCTS_PFCTIMER_MASK I40E_MASK(0x3FFF, I40E_PRTDCB_TPFCTS_PFCTIMER_SHIFT) #define I40E_GL_FWSTS 0x00083048 /* Reset: POR */ #define I40E_GL_FWSTS_FWS1B_SHIFT 16 #define I40E_GL_FWSTS_FWS1B_MASK I40E_MASK(0xFF, I40E_GL_FWSTS_FWS1B_SHIFT) @@ -359,6 +484,27 @@ #define I40E_PRTGL_SAL 0x001E2120 /* Reset: GLOBR */ #define I40E_PRTGL_SAL_FC_SAL_SHIFT 0 #define I40E_PRTGL_SAL_FC_SAL_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTGL_SAL_FC_SAL_SHIFT) +#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP 0x001E3260 /* Reset: GLOBR */ +#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP_SHIFT 0 +#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP_MASK I40E_MASK(0x1, \ + I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP_SHIFT) +#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP 0x001E32E0 /* Reset: GLOBR */ +#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP_SHIFT 0 +#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP_MASK I40E_MASK(0x1, \ + I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP_SHIFT) +#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE 0x001E30C0 /* Reset: GLOBR */ +#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_SHIFT 0 +#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_MASK I40E_MASK(0x1FF, \ + I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_SHIFT) +#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE 0x001E30D0 /* Reset: GLOBR */ +#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_SHIFT 0 +#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_MASK I40E_MASK(0x1FF, \ + I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_SHIFT) +#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(_i) (0x001E3400 + ((_i) * 16)) /* _i=0...8 */ +#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MAX_INDEX 8 +#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_SHIFT 0 +#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MASK I40E_MASK(0xFFFF, \ + I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_SHIFT) #define I40E_GLNVM_FLA 0x000B6108 /* Reset: POR */ #define I40E_GLNVM_FLA_LOCKED_SHIFT 6 #define I40E_GLNVM_FLA_LOCKED_MASK I40E_MASK(0x1, I40E_GLNVM_FLA_LOCKED_SHIFT) @@ -398,8 +544,34 @@ #define I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK I40E_MASK(0x1, I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT) #define I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT 31 #define I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_MASK I40E_MASK(0x1, I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT) +#define I40E_PRTPM_EEER_TX_LPI_EN_SHIFT 16 +#define I40E_PRTPM_EEER_TX_LPI_EN_MASK I40E_MASK(0x1, I40E_PRTPM_EEER_TX_LPI_EN_SHIFT) #define I40E_PRTPM_RLPIC 0x001E43A0 /* Reset: GLOBR */ #define I40E_PRTPM_TLPIC 0x001E43C0 /* Reset: GLOBR */ +#define I40E_PRTRPB_DHW(_i) (0x000AC100 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */ +#define I40E_PRTRPB_DHW_DHW_TCN_SHIFT 0 +#define I40E_PRTRPB_DHW_DHW_TCN_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_DHW_DHW_TCN_SHIFT) +#define I40E_PRTRPB_DLW(_i) (0x000AC220 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */ +#define I40E_PRTRPB_DLW_DLW_TCN_SHIFT 0 +#define I40E_PRTRPB_DLW_DLW_TCN_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_DLW_DLW_TCN_SHIFT) +#define I40E_PRTRPB_DPS(_i) (0x000AC320 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */ +#define I40E_PRTRPB_DPS_DPS_TCN_SHIFT 0 +#define I40E_PRTRPB_DPS_DPS_TCN_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_DPS_DPS_TCN_SHIFT) +#define I40E_PRTRPB_SHT(_i) (0x000AC480 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */ +#define I40E_PRTRPB_SHT_SHT_TCN_SHIFT 0 +#define I40E_PRTRPB_SHT_SHT_TCN_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_SHT_SHT_TCN_SHIFT) +#define I40E_PRTRPB_SHW 0x000AC580 /* Reset: CORER */ +#define I40E_PRTRPB_SHW_SHW_SHIFT 0 +#define I40E_PRTRPB_SHW_SHW_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_SHW_SHW_SHIFT) +#define I40E_PRTRPB_SLT(_i) (0x000AC5A0 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */ +#define I40E_PRTRPB_SLT_SLT_TCN_SHIFT 0 +#define I40E_PRTRPB_SLT_SLT_TCN_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_SLT_SLT_TCN_SHIFT) +#define I40E_PRTRPB_SLW 0x000AC6A0 /* Reset: CORER */ +#define I40E_PRTRPB_SLW_SLW_SHIFT 0 +#define I40E_PRTRPB_SLW_SLW_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_SLW_SLW_SHIFT) +#define I40E_PRTRPB_SPS 0x000AC7C0 /* Reset: CORER */ +#define I40E_PRTRPB_SPS_SPS_SHIFT 0 +#define I40E_PRTRPB_SPS_SPS_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_SPS_SPS_SHIFT) #define I40E_GLQF_FDCNT_0 0x00269BAC /* Reset: CORER */ #define I40E_GLQF_FDCNT_0_GUARANT_CNT_SHIFT 0 #define I40E_GLQF_FDCNT_0_GUARANT_CNT_MASK I40E_MASK(0x1FFF, I40E_GLQF_FDCNT_0_GUARANT_CNT_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 8d2ea4293d69..3d24c6032616 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -42,9 +42,6 @@ static void i40e_fdir(struct i40e_ring *tx_ring, flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK & (fdata->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT); - flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK & - (fdata->flex_offset << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT); - /* Use LAN VSI Id if not programmed by user */ flex_ptype |= I40E_TXD_FLTR_QW0_DEST_VSI_MASK & ((u32)(fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id) << @@ -160,59 +157,180 @@ dma_fail: return -1; } -#define IP_HEADER_OFFSET 14 -#define I40E_UDPIP_DUMMY_PACKET_LEN 42 /** - * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters - * @vsi: pointer to the targeted VSI - * @fd_data: the flow director data required for the FDir descriptor - * @add: true adds a filter, false removes it + * i40e_create_dummy_packet - Constructs dummy packet for HW + * @dummy_packet: preallocated space for dummy packet + * @ipv4: is layer 3 packet of version 4 or 6 + * @l4proto: next level protocol used in data portion of l3 + * @data: filter data * - * Returns 0 if the filters were successfully added or removed + * Returns address of layer 4 protocol dummy packet. **/ -static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi, - struct i40e_fdir_filter *fd_data, - bool add) +static char *i40e_create_dummy_packet(u8 *dummy_packet, bool ipv4, u8 l4proto, + struct i40e_fdir_filter *data) +{ + bool is_vlan = !!data->vlan_tag; + struct vlan_hdr vlan; + struct ipv6hdr ipv6; + struct ethhdr eth; + struct iphdr ip; + u8 *tmp; + + if (ipv4) { + eth.h_proto = cpu_to_be16(ETH_P_IP); + ip.protocol = l4proto; + ip.version = 0x4; + ip.ihl = 0x5; + + ip.daddr = data->dst_ip; + ip.saddr = data->src_ip; + } else { + eth.h_proto = cpu_to_be16(ETH_P_IPV6); + ipv6.nexthdr = l4proto; + ipv6.version = 0x6; + + memcpy(&ipv6.saddr.in6_u.u6_addr32, data->src_ip6, + sizeof(__be32) * 4); + memcpy(&ipv6.daddr.in6_u.u6_addr32, data->dst_ip6, + sizeof(__be32) * 4); + } + + if (is_vlan) { + vlan.h_vlan_TCI = data->vlan_tag; + vlan.h_vlan_encapsulated_proto = eth.h_proto; + eth.h_proto = data->vlan_etype; + } + + tmp = dummy_packet; + memcpy(tmp, ð, sizeof(eth)); + tmp += sizeof(eth); + + if (is_vlan) { + memcpy(tmp, &vlan, sizeof(vlan)); + tmp += sizeof(vlan); + } + + if (ipv4) { + memcpy(tmp, &ip, sizeof(ip)); + tmp += sizeof(ip); + } else { + memcpy(tmp, &ipv6, sizeof(ipv6)); + tmp += sizeof(ipv6); + } + + return tmp; +} + +/** + * i40e_create_dummy_udp_packet - helper function to create UDP packet + * @raw_packet: preallocated space for dummy packet + * @ipv4: is layer 3 packet of version 4 or 6 + * @l4proto: next level protocol used in data portion of l3 + * @data: filter data + * + * Helper function to populate udp fields. + **/ +static void i40e_create_dummy_udp_packet(u8 *raw_packet, bool ipv4, u8 l4proto, + struct i40e_fdir_filter *data) { - struct i40e_pf *pf = vsi->back; struct udphdr *udp; - struct iphdr *ip; - u8 *raw_packet; - int ret; - static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, - 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + u8 *tmp; - raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); - if (!raw_packet) - return -ENOMEM; - memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN); + tmp = i40e_create_dummy_packet(raw_packet, ipv4, IPPROTO_UDP, data); + udp = (struct udphdr *)(tmp); + udp->dest = data->dst_port; + udp->source = data->src_port; +} + +/** + * i40e_create_dummy_tcp_packet - helper function to create TCP packet + * @raw_packet: preallocated space for dummy packet + * @ipv4: is layer 3 packet of version 4 or 6 + * @l4proto: next level protocol used in data portion of l3 + * @data: filter data + * + * Helper function to populate tcp fields. + **/ +static void i40e_create_dummy_tcp_packet(u8 *raw_packet, bool ipv4, u8 l4proto, + struct i40e_fdir_filter *data) +{ + struct tcphdr *tcp; + u8 *tmp; + /* Dummy tcp packet */ + static const char tcp_packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0x50, 0x11, 0x0, 0x72, 0, 0, 0, 0}; + + tmp = i40e_create_dummy_packet(raw_packet, ipv4, IPPROTO_TCP, data); + + tcp = (struct tcphdr *)tmp; + memcpy(tcp, tcp_packet, sizeof(tcp_packet)); + tcp->dest = data->dst_port; + tcp->source = data->src_port; +} + +/** + * i40e_create_dummy_sctp_packet - helper function to create SCTP packet + * @raw_packet: preallocated space for dummy packet + * @ipv4: is layer 3 packet of version 4 or 6 + * @l4proto: next level protocol used in data portion of l3 + * @data: filter data + * + * Helper function to populate sctp fields. + **/ +static void i40e_create_dummy_sctp_packet(u8 *raw_packet, bool ipv4, + u8 l4proto, + struct i40e_fdir_filter *data) +{ + struct sctphdr *sctp; + u8 *tmp; - ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); - udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET - + sizeof(struct iphdr)); + tmp = i40e_create_dummy_packet(raw_packet, ipv4, IPPROTO_SCTP, data); - ip->daddr = fd_data->dst_ip; - udp->dest = fd_data->dst_port; - ip->saddr = fd_data->src_ip; - udp->source = fd_data->src_port; + sctp = (struct sctphdr *)tmp; + sctp->dest = data->dst_port; + sctp->source = data->src_port; +} + +/** + * i40e_prepare_fdir_filter - Prepare and program fdir filter + * @pf: physical function to attach filter to + * @fd_data: filter data + * @add: add or delete filter + * @packet_addr: address of dummy packet, used in filtering + * @payload_offset: offset from dummy packet address to user defined data + * @pctype: Packet type for which filter is used + * + * Helper function to offset data of dummy packet, program it and + * handle errors. + **/ +static int i40e_prepare_fdir_filter(struct i40e_pf *pf, + struct i40e_fdir_filter *fd_data, + bool add, char *packet_addr, + int payload_offset, u8 pctype) +{ + int ret; if (fd_data->flex_filter) { - u8 *payload = raw_packet + I40E_UDPIP_DUMMY_PACKET_LEN; + u8 *payload; __be16 pattern = fd_data->flex_word; u16 off = fd_data->flex_offset; + payload = packet_addr + payload_offset; + + /* If user provided vlan, offset payload by vlan header length */ + if (!!fd_data->vlan_tag) + payload += VLAN_HLEN; + *((__force __be16 *)(payload + off)) = pattern; } - fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; - ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); + fd_data->pctype = pctype; + ret = i40e_program_fdir_filter(fd_data, packet_addr, pf, add); if (ret) { dev_info(&pf->pdev->dev, "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n", fd_data->pctype, fd_data->fd_id, ret); /* Free the packet buffer since it wasn't added to the ring */ - kfree(raw_packet); return -EOPNOTSUPP; } else if (I40E_DEBUG_FD & pf->hw.debug_mask) { if (add) @@ -225,238 +343,243 @@ static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi, fd_data->pctype, fd_data->fd_id); } - if (add) - pf->fd_udp4_filter_cnt++; - else - pf->fd_udp4_filter_cnt--; + return ret; +} - return 0; +/** + * i40e_change_filter_num - Prepare and program fdir filter + * @ipv4: is layer 3 packet of version 4 or 6 + * @add: add or delete filter + * @ipv4_filter_num: field to update + * @ipv6_filter_num: field to update + * + * Update filter number field for pf. + **/ +static void i40e_change_filter_num(bool ipv4, bool add, u16 *ipv4_filter_num, + u16 *ipv6_filter_num) +{ + if (add) { + if (ipv4) + (*ipv4_filter_num)++; + else + (*ipv6_filter_num)++; + } else { + if (ipv4) + (*ipv4_filter_num)--; + else + (*ipv6_filter_num)--; + } } -#define I40E_TCPIP_DUMMY_PACKET_LEN 54 +#define IP_HEADER_OFFSET 14 +#define I40E_UDPIP_DUMMY_PACKET_LEN 42 +#define I40E_UDPIP6_DUMMY_PACKET_LEN 62 /** - * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters + * i40e_add_del_fdir_udp - Add/Remove UDP filters * @vsi: pointer to the targeted VSI * @fd_data: the flow director data required for the FDir descriptor * @add: true adds a filter, false removes it + * @ipv4: true is v4, false is v6 * * Returns 0 if the filters were successfully added or removed **/ -static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, - struct i40e_fdir_filter *fd_data, - bool add) +static int i40e_add_del_fdir_udp(struct i40e_vsi *vsi, + struct i40e_fdir_filter *fd_data, + bool add, + bool ipv4) { struct i40e_pf *pf = vsi->back; - struct tcphdr *tcp; - struct iphdr *ip; u8 *raw_packet; int ret; - /* Dummy packet */ - static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, - 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11, - 0x0, 0x72, 0, 0, 0, 0}; raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); if (!raw_packet) return -ENOMEM; - memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN); - - ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); - tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET - + sizeof(struct iphdr)); - ip->daddr = fd_data->dst_ip; - tcp->dest = fd_data->dst_port; - ip->saddr = fd_data->src_ip; - tcp->source = fd_data->src_port; + i40e_create_dummy_udp_packet(raw_packet, ipv4, IPPROTO_UDP, fd_data); - if (fd_data->flex_filter) { - u8 *payload = raw_packet + I40E_TCPIP_DUMMY_PACKET_LEN; - __be16 pattern = fd_data->flex_word; - u16 off = fd_data->flex_offset; + if (ipv4) + ret = i40e_prepare_fdir_filter + (pf, fd_data, add, raw_packet, + I40E_UDPIP_DUMMY_PACKET_LEN, + I40E_FILTER_PCTYPE_NONF_IPV4_UDP); + else + ret = i40e_prepare_fdir_filter + (pf, fd_data, add, raw_packet, + I40E_UDPIP6_DUMMY_PACKET_LEN, + I40E_FILTER_PCTYPE_NONF_IPV6_UDP); - *((__force __be16 *)(payload + off)) = pattern; + if (ret) { + kfree(raw_packet); + return ret; } - fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP; - ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); + i40e_change_filter_num(ipv4, add, &pf->fd_udp4_filter_cnt, + &pf->fd_udp6_filter_cnt); + + return 0; +} + +#define I40E_TCPIP_DUMMY_PACKET_LEN 54 +#define I40E_TCPIP6_DUMMY_PACKET_LEN 74 +/** + * i40e_add_del_fdir_tcp - Add/Remove TCPv4 filters + * @vsi: pointer to the targeted VSI + * @fd_data: the flow director data required for the FDir descriptor + * @add: true adds a filter, false removes it + * @ipv4: true is v4, false is v6 + * + * Returns 0 if the filters were successfully added or removed + **/ +static int i40e_add_del_fdir_tcp(struct i40e_vsi *vsi, + struct i40e_fdir_filter *fd_data, + bool add, + bool ipv4) +{ + struct i40e_pf *pf = vsi->back; + u8 *raw_packet; + int ret; + + raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); + if (!raw_packet) + return -ENOMEM; + + i40e_create_dummy_tcp_packet(raw_packet, ipv4, IPPROTO_TCP, fd_data); + if (ipv4) + ret = i40e_prepare_fdir_filter + (pf, fd_data, add, raw_packet, + I40E_TCPIP_DUMMY_PACKET_LEN, + I40E_FILTER_PCTYPE_NONF_IPV4_TCP); + else + ret = i40e_prepare_fdir_filter + (pf, fd_data, add, raw_packet, + I40E_TCPIP6_DUMMY_PACKET_LEN, + I40E_FILTER_PCTYPE_NONF_IPV6_TCP); + if (ret) { - dev_info(&pf->pdev->dev, - "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n", - fd_data->pctype, fd_data->fd_id, ret); - /* Free the packet buffer since it wasn't added to the ring */ kfree(raw_packet); - return -EOPNOTSUPP; - } else if (I40E_DEBUG_FD & pf->hw.debug_mask) { - if (add) - dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n", - fd_data->pctype, fd_data->fd_id); - else - dev_info(&pf->pdev->dev, - "Filter deleted for PCTYPE %d loc = %d\n", - fd_data->pctype, fd_data->fd_id); + return ret; } + i40e_change_filter_num(ipv4, add, &pf->fd_tcp4_filter_cnt, + &pf->fd_tcp6_filter_cnt); + if (add) { - pf->fd_tcp4_filter_cnt++; if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state); - } else { - pf->fd_tcp4_filter_cnt--; } - return 0; } -#define I40E_SCTPIP_DUMMY_PACKET_LEN 46 +#define I40E_SCTPIP_DUMMY_PACKET_LEN 46 +#define I40E_SCTPIP6_DUMMY_PACKET_LEN 66 /** - * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for + * i40e_add_del_fdir_sctp - Add/Remove SCTPv4 Flow Director filters for * a specific flow spec * @vsi: pointer to the targeted VSI * @fd_data: the flow director data required for the FDir descriptor * @add: true adds a filter, false removes it + * @ipv4: true is v4, false is v6 * * Returns 0 if the filters were successfully added or removed **/ -static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi, - struct i40e_fdir_filter *fd_data, - bool add) +static int i40e_add_del_fdir_sctp(struct i40e_vsi *vsi, + struct i40e_fdir_filter *fd_data, + bool add, + bool ipv4) { struct i40e_pf *pf = vsi->back; - struct sctphdr *sctp; - struct iphdr *ip; u8 *raw_packet; int ret; - /* Dummy packet */ - static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, - 0x45, 0, 0, 0x20, 0, 0, 0x40, 0, 0x40, 0x84, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); if (!raw_packet) return -ENOMEM; - memcpy(raw_packet, packet, I40E_SCTPIP_DUMMY_PACKET_LEN); - ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); - sctp = (struct sctphdr *)(raw_packet + IP_HEADER_OFFSET - + sizeof(struct iphdr)); + i40e_create_dummy_sctp_packet(raw_packet, ipv4, IPPROTO_SCTP, fd_data); - ip->daddr = fd_data->dst_ip; - sctp->dest = fd_data->dst_port; - ip->saddr = fd_data->src_ip; - sctp->source = fd_data->src_port; - - if (fd_data->flex_filter) { - u8 *payload = raw_packet + I40E_SCTPIP_DUMMY_PACKET_LEN; - __be16 pattern = fd_data->flex_word; - u16 off = fd_data->flex_offset; - - *((__force __be16 *)(payload + off)) = pattern; - } + if (ipv4) + ret = i40e_prepare_fdir_filter + (pf, fd_data, add, raw_packet, + I40E_SCTPIP_DUMMY_PACKET_LEN, + I40E_FILTER_PCTYPE_NONF_IPV4_SCTP); + else + ret = i40e_prepare_fdir_filter + (pf, fd_data, add, raw_packet, + I40E_SCTPIP6_DUMMY_PACKET_LEN, + I40E_FILTER_PCTYPE_NONF_IPV6_SCTP); - fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_SCTP; - ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); if (ret) { - dev_info(&pf->pdev->dev, - "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n", - fd_data->pctype, fd_data->fd_id, ret); - /* Free the packet buffer since it wasn't added to the ring */ kfree(raw_packet); - return -EOPNOTSUPP; - } else if (I40E_DEBUG_FD & pf->hw.debug_mask) { - if (add) - dev_info(&pf->pdev->dev, - "Filter OK for PCTYPE %d loc = %d\n", - fd_data->pctype, fd_data->fd_id); - else - dev_info(&pf->pdev->dev, - "Filter deleted for PCTYPE %d loc = %d\n", - fd_data->pctype, fd_data->fd_id); + return ret; } - if (add) - pf->fd_sctp4_filter_cnt++; - else - pf->fd_sctp4_filter_cnt--; + i40e_change_filter_num(ipv4, add, &pf->fd_sctp4_filter_cnt, + &pf->fd_sctp6_filter_cnt); return 0; } -#define I40E_IP_DUMMY_PACKET_LEN 34 +#define I40E_IP_DUMMY_PACKET_LEN 34 +#define I40E_IP6_DUMMY_PACKET_LEN 54 /** - * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for + * i40e_add_del_fdir_ip - Add/Remove IPv4 Flow Director filters for * a specific flow spec * @vsi: pointer to the targeted VSI * @fd_data: the flow director data required for the FDir descriptor * @add: true adds a filter, false removes it + * @ipv4: true is v4, false is v6 * * Returns 0 if the filters were successfully added or removed **/ -static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi, - struct i40e_fdir_filter *fd_data, - bool add) +static int i40e_add_del_fdir_ip(struct i40e_vsi *vsi, + struct i40e_fdir_filter *fd_data, + bool add, + bool ipv4) { struct i40e_pf *pf = vsi->back; - struct iphdr *ip; + int payload_offset; u8 *raw_packet; + int iter_start; + int iter_end; int ret; int i; - static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, - 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0}; - for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER; - i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) { + if (ipv4) { + iter_start = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER; + iter_end = I40E_FILTER_PCTYPE_FRAG_IPV4; + } else { + iter_start = I40E_FILTER_PCTYPE_NONF_IPV6_OTHER; + iter_end = I40E_FILTER_PCTYPE_FRAG_IPV6; + } + + for (i = iter_start; i <= iter_end; i++) { raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); if (!raw_packet) return -ENOMEM; - memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN); - ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); - ip->saddr = fd_data->src_ip; - ip->daddr = fd_data->dst_ip; - ip->protocol = 0; + /* IPv6 no header option differs from IPv4 */ + (void)i40e_create_dummy_packet + (raw_packet, ipv4, (ipv4) ? IPPROTO_IP : IPPROTO_NONE, + fd_data); - if (fd_data->flex_filter) { - u8 *payload = raw_packet + I40E_IP_DUMMY_PACKET_LEN; - __be16 pattern = fd_data->flex_word; - u16 off = fd_data->flex_offset; - - *((__force __be16 *)(payload + off)) = pattern; - } - - fd_data->pctype = i; - ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); - if (ret) { - dev_info(&pf->pdev->dev, - "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n", - fd_data->pctype, fd_data->fd_id, ret); - /* The packet buffer wasn't added to the ring so we - * need to free it now. - */ - kfree(raw_packet); - return -EOPNOTSUPP; - } else if (I40E_DEBUG_FD & pf->hw.debug_mask) { - if (add) - dev_info(&pf->pdev->dev, - "Filter OK for PCTYPE %d loc = %d\n", - fd_data->pctype, fd_data->fd_id); - else - dev_info(&pf->pdev->dev, - "Filter deleted for PCTYPE %d loc = %d\n", - fd_data->pctype, fd_data->fd_id); - } + payload_offset = (ipv4) ? I40E_IP_DUMMY_PACKET_LEN : + I40E_IP6_DUMMY_PACKET_LEN; + ret = i40e_prepare_fdir_filter(pf, fd_data, add, raw_packet, + payload_offset, i); + if (ret) + goto err; } - if (add) - pf->fd_ip4_filter_cnt++; - else - pf->fd_ip4_filter_cnt--; + i40e_change_filter_num(ipv4, add, &pf->fd_ip4_filter_cnt, + &pf->fd_ip6_filter_cnt); return 0; +err: + kfree(raw_packet); + return ret; } /** @@ -469,37 +592,68 @@ static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi, int i40e_add_del_fdir(struct i40e_vsi *vsi, struct i40e_fdir_filter *input, bool add) { + enum ip_ver { ipv6 = 0, ipv4 = 1 }; struct i40e_pf *pf = vsi->back; int ret; switch (input->flow_type & ~FLOW_EXT) { case TCP_V4_FLOW: - ret = i40e_add_del_fdir_tcpv4(vsi, input, add); + ret = i40e_add_del_fdir_tcp(vsi, input, add, ipv4); break; case UDP_V4_FLOW: - ret = i40e_add_del_fdir_udpv4(vsi, input, add); + ret = i40e_add_del_fdir_udp(vsi, input, add, ipv4); break; case SCTP_V4_FLOW: - ret = i40e_add_del_fdir_sctpv4(vsi, input, add); + ret = i40e_add_del_fdir_sctp(vsi, input, add, ipv4); + break; + case TCP_V6_FLOW: + ret = i40e_add_del_fdir_tcp(vsi, input, add, ipv6); + break; + case UDP_V6_FLOW: + ret = i40e_add_del_fdir_udp(vsi, input, add, ipv6); + break; + case SCTP_V6_FLOW: + ret = i40e_add_del_fdir_sctp(vsi, input, add, ipv6); break; case IP_USER_FLOW: - switch (input->ip4_proto) { + switch (input->ipl4_proto) { case IPPROTO_TCP: - ret = i40e_add_del_fdir_tcpv4(vsi, input, add); + ret = i40e_add_del_fdir_tcp(vsi, input, add, ipv4); break; case IPPROTO_UDP: - ret = i40e_add_del_fdir_udpv4(vsi, input, add); + ret = i40e_add_del_fdir_udp(vsi, input, add, ipv4); break; case IPPROTO_SCTP: - ret = i40e_add_del_fdir_sctpv4(vsi, input, add); + ret = i40e_add_del_fdir_sctp(vsi, input, add, ipv4); break; case IPPROTO_IP: - ret = i40e_add_del_fdir_ipv4(vsi, input, add); + ret = i40e_add_del_fdir_ip(vsi, input, add, ipv4); break; default: /* We cannot support masking based on protocol */ dev_info(&pf->pdev->dev, "Unsupported IPv4 protocol 0x%02x\n", - input->ip4_proto); + input->ipl4_proto); + return -EINVAL; + } + break; + case IPV6_USER_FLOW: + switch (input->ipl4_proto) { + case IPPROTO_TCP: + ret = i40e_add_del_fdir_tcp(vsi, input, add, ipv6); + break; + case IPPROTO_UDP: + ret = i40e_add_del_fdir_udp(vsi, input, add, ipv6); + break; + case IPPROTO_SCTP: + ret = i40e_add_del_fdir_sctp(vsi, input, add, ipv6); + break; + case IPPROTO_IP: + ret = i40e_add_del_fdir_ip(vsi, input, add, ipv6); + break; + default: + /* We cannot support masking based on protocol */ + dev_info(&pf->pdev->dev, "Unsupported IPv6 protocol 0x%02x\n", + input->ipl4_proto); return -EINVAL; } break; diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index c0bdc666f557..5c10faaca790 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 2013 - 2018 Intel Corporation. */ +/* Copyright(c) 2013 - 2021 Intel Corporation. */ #ifndef _I40E_TYPE_H_ #define _I40E_TYPE_H_ @@ -517,6 +517,7 @@ struct i40e_dcbx_config { #define I40E_DCBX_MODE_CEE 0x1 #define I40E_DCBX_MODE_IEEE 0x2 u8 app_mode; +#define I40E_DCBX_APPS_NON_WILLING 0x1 u32 numapps; u32 tlv_status; /* CEE mode TLV status */ struct i40e_dcb_ets_config etscfg; @@ -1420,6 +1421,8 @@ struct i40e_lldp_variables { #define I40E_L4_DST_MASK (0x1ULL << I40E_L4_DST_SHIFT) #define I40E_VERIFY_TAG_SHIFT 31 #define I40E_VERIFY_TAG_MASK (0x3ULL << I40E_VERIFY_TAG_SHIFT) +#define I40E_VLAN_SRC_SHIFT 55 +#define I40E_VLAN_SRC_MASK (0x1ULL << I40E_VLAN_SRC_SHIFT) #define I40E_FLEX_50_SHIFT 13 #define I40E_FLEX_50_MASK (0x1ULL << I40E_FLEX_50_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 492ce213208d..470b8600adb1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -250,27 +250,68 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring, xdp->data_end - xdp->data_hard_start, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) - return NULL; + goto out; skb_reserve(skb, xdp->data - xdp->data_hard_start); memcpy(__skb_put(skb, datasize), xdp->data, datasize); if (metasize) skb_metadata_set(skb, metasize); +out: xsk_buff_free(xdp); return skb; } -/** - * i40e_inc_ntc: Advance the next_to_clean index - * @rx_ring: Rx ring - **/ -static void i40e_inc_ntc(struct i40e_ring *rx_ring) +static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring, + struct xdp_buff *xdp_buff, + union i40e_rx_desc *rx_desc, + unsigned int *rx_packets, + unsigned int *rx_bytes, + unsigned int size, + unsigned int xdp_res) { - u32 ntc = rx_ring->next_to_clean + 1; + struct sk_buff *skb; + + *rx_packets = 1; + *rx_bytes = size; + + if (likely(xdp_res == I40E_XDP_REDIR) || xdp_res == I40E_XDP_TX) + return; + + if (xdp_res == I40E_XDP_CONSUMED) { + xsk_buff_free(xdp_buff); + return; + } + + if (xdp_res == I40E_XDP_PASS) { + /* NB! We are not checking for errors using + * i40e_test_staterr with + * BIT(I40E_RXD_QW1_ERROR_SHIFT). This is due to that + * SBP is *not* set in PRT_SBPVSI (default not set). + */ + skb = i40e_construct_skb_zc(rx_ring, xdp_buff); + if (!skb) { + rx_ring->rx_stats.alloc_buff_failed++; + *rx_packets = 0; + *rx_bytes = 0; + return; + } - ntc = (ntc < rx_ring->count) ? ntc : 0; - rx_ring->next_to_clean = ntc; + if (eth_skb_pad(skb)) { + *rx_packets = 0; + *rx_bytes = 0; + return; + } + + *rx_bytes = skb->len; + i40e_process_skb_fields(rx_ring, rx_desc, skb); + napi_gro_receive(&rx_ring->q_vector->napi, skb); + return; + } + + /* Should never get here, as all valid cases have been handled already. + */ + WARN_ON_ONCE(1); } /** @@ -284,17 +325,20 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); + u16 next_to_clean = rx_ring->next_to_clean; + u16 count_mask = rx_ring->count - 1; unsigned int xdp_res, xdp_xmit = 0; bool failure = false; - struct sk_buff *skb; while (likely(total_rx_packets < (unsigned int)budget)) { union i40e_rx_desc *rx_desc; - struct xdp_buff **bi; + unsigned int rx_packets; + unsigned int rx_bytes; + struct xdp_buff *bi; unsigned int size; u64 qword; - rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean); + rx_desc = I40E_RX_DESC(rx_ring, next_to_clean); qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); /* This memory barrier is needed to keep us from reading @@ -307,11 +351,9 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) i40e_clean_programming_status(rx_ring, rx_desc->raw.qword[0], qword); - bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean); - xsk_buff_free(*bi); - *bi = NULL; - cleaned_count++; - i40e_inc_ntc(rx_ring); + bi = *i40e_rx_bi(rx_ring, next_to_clean); + xsk_buff_free(bi); + next_to_clean = (next_to_clean + 1) & count_mask; continue; } @@ -320,53 +362,22 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) if (!size) break; - bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean); - (*bi)->data_end = (*bi)->data + size; - xsk_buff_dma_sync_for_cpu(*bi, rx_ring->xsk_pool); - - xdp_res = i40e_run_xdp_zc(rx_ring, *bi); - if (xdp_res) { - if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) - xdp_xmit |= xdp_res; - else - xsk_buff_free(*bi); - - *bi = NULL; - total_rx_bytes += size; - total_rx_packets++; - - cleaned_count++; - i40e_inc_ntc(rx_ring); - continue; - } - - /* XDP_PASS path */ - - /* NB! We are not checking for errors using - * i40e_test_staterr with - * BIT(I40E_RXD_QW1_ERROR_SHIFT). This is due to that - * SBP is *not* set in PRT_SBPVSI (default not set). - */ - skb = i40e_construct_skb_zc(rx_ring, *bi); - if (!skb) { - rx_ring->rx_stats.alloc_buff_failed++; - break; - } - - *bi = NULL; - cleaned_count++; - i40e_inc_ntc(rx_ring); - - if (eth_skb_pad(skb)) - continue; - - total_rx_bytes += skb->len; - total_rx_packets++; - - i40e_process_skb_fields(rx_ring, rx_desc, skb); - napi_gro_receive(&rx_ring->q_vector->napi, skb); + bi = *i40e_rx_bi(rx_ring, next_to_clean); + bi->data_end = bi->data + size; + xsk_buff_dma_sync_for_cpu(bi, rx_ring->xsk_pool); + + xdp_res = i40e_run_xdp_zc(rx_ring, bi); + i40e_handle_xdp_result_zc(rx_ring, bi, rx_desc, &rx_packets, + &rx_bytes, size, xdp_res); + total_rx_packets += rx_packets; + total_rx_bytes += rx_bytes; + xdp_xmit |= xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR); + next_to_clean = (next_to_clean + 1) & count_mask; } + rx_ring->next_to_clean = next_to_clean; + cleaned_count = (next_to_clean - rx_ring->next_to_use - 1) & count_mask; + if (cleaned_count >= I40E_RX_BUFFER_WRITE) failure = !i40e_alloc_rx_buffers_zc(rx_ring, cleaned_count); @@ -374,7 +385,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets); if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) { - if (failure || rx_ring->next_to_clean == rx_ring->next_to_use) + if (failure || next_to_clean == rx_ring->next_to_use) xsk_set_rx_need_wakeup(rx_ring->xsk_pool); else xsk_clear_rx_need_wakeup(rx_ring->xsk_pool); @@ -604,16 +615,14 @@ int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring) { - u16 i; - - for (i = 0; i < rx_ring->count; i++) { - struct xdp_buff *rx_bi = *i40e_rx_bi(rx_ring, i); + u16 count_mask = rx_ring->count - 1; + u16 ntc = rx_ring->next_to_clean; + u16 ntu = rx_ring->next_to_use; - if (!rx_bi) - continue; + for ( ; ntc != ntu; ntc = (ntc + 1) & count_mask) { + struct xdp_buff *rx_bi = *i40e_rx_bi(rx_ring, ntc); xsk_buff_free(rx_bi); - rx_bi = NULL; } } diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 6da4f43f2348..73da4f71f530 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -24,6 +24,7 @@ ice-y := ice_main.o \ ice_flow.o \ ice_devlink.o \ ice_fw_update.o \ + ice_lag.o \ ice_ethtool.o ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index fca428c879ec..dae8280ce17c 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -39,6 +39,7 @@ #include <net/devlink.h> #include <net/ipv6.h> #include <net/xdp_sock.h> +#include <net/xdp_sock_drv.h> #include <net/geneve.h> #include <net/gre.h> #include <net/udp_tunnel.h> @@ -55,6 +56,7 @@ #include "ice_fdir.h" #include "ice_xsk.h" #include "ice_arfs.h" +#include "ice_lag.h" #define ICE_BAR0 0 #define ICE_REQ_DESC_MULTIPLE 32 @@ -326,9 +328,11 @@ struct ice_vsi { struct ice_ring **xdp_rings; /* XDP ring array */ u16 num_xdp_txq; /* Used XDP queues */ u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ - struct xsk_buff_pool **xsk_pools; - u16 num_xsk_pools_used; - u16 num_xsk_pools; + + /* setup back reference, to which aggregator node this VSI + * corresponds to + */ + struct ice_agg_node *agg_node; } ____cacheline_internodealigned_in_smp; /* struct that defines an interrupt vector */ @@ -377,6 +381,13 @@ enum ice_pf_flags { ICE_PF_FLAGS_NBITS /* must be last */ }; +struct ice_agg_node { + u32 agg_id; +#define ICE_MAX_VSIS_IN_AGG_NODE 64 + u32 num_vsis; + u8 valid; +}; + struct ice_pf { struct pci_dev *pdev; @@ -455,6 +466,15 @@ struct ice_pf { __le64 nvm_phy_type_lo; /* NVM PHY type low */ __le64 nvm_phy_type_hi; /* NVM PHY type high */ struct ice_link_default_override_tlv link_dflt_override; + struct ice_lag *lag; /* Link Aggregation information */ + +#define ICE_INVALID_AGG_NODE_ID 0 +#define ICE_PF_AGG_NODE_ID_START 1 +#define ICE_MAX_PF_AGG_NODES 32 + struct ice_agg_node pf_agg_node[ICE_MAX_PF_AGG_NODES]; +#define ICE_VF_AGG_NODE_ID_START 65 +#define ICE_MAX_VF_AGG_NODES 32 + struct ice_agg_node vf_agg_node[ICE_MAX_VF_AGG_NODES]; }; struct ice_netdev_priv { @@ -517,17 +537,15 @@ static inline void ice_set_ring_xdp(struct ice_ring *ring) */ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_ring *ring) { - struct xsk_buff_pool **pools = ring->vsi->xsk_pools; u16 qid = ring->q_index; if (ice_ring_is_xdp(ring)) qid -= ring->vsi->num_xdp_txq; - if (qid >= ring->vsi->num_xsk_pools || !pools || !pools[qid] || - !ice_is_xdp_ena_vsi(ring->vsi)) + if (!ice_is_xdp_ena_vsi(ring->vsi)) return NULL; - return pools[qid]; + return xsk_get_pool_from_qid(ring->vsi->netdev, qid); } /** @@ -557,11 +575,31 @@ static inline struct ice_vsi *ice_get_ctrl_vsi(struct ice_pf *pf) return pf->vsi[pf->ctrl_vsi_idx]; } +/** + * ice_set_sriov_cap - enable SRIOV in PF flags + * @pf: PF struct + */ +static inline void ice_set_sriov_cap(struct ice_pf *pf) +{ + if (pf->hw.func_caps.common_cap.sr_iov_1_1) + set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); +} + +/** + * ice_clear_sriov_cap - disable SRIOV in PF flags + * @pf: PF struct + */ +static inline void ice_clear_sriov_cap(struct ice_pf *pf) +{ + clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); +} + #define ICE_FD_STAT_CTR_BLOCK_COUNT 256 #define ICE_FD_STAT_PF_IDX(base_idx) \ ((base_idx) * ICE_FD_STAT_CTR_BLOCK_COUNT) #define ICE_FD_SB_STAT_IDX(base_idx) ICE_FD_STAT_PF_IDX(base_idx) +bool netif_is_ice(struct net_device *dev); int ice_vsi_setup_tx_rings(struct ice_vsi *vsi); int ice_vsi_setup_rx_rings(struct ice_vsi *vsi); int ice_vsi_open_ctrl(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index a51470b68d54..80186589153b 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -695,6 +695,18 @@ struct ice_aqc_sched_elem_cmd { __le32 addr_low; }; +struct ice_aqc_txsched_move_grp_info_hdr { + __le32 src_parent_teid; + __le32 dest_parent_teid; + __le16 num_elems; + __le16 reserved; +}; + +struct ice_aqc_move_elem { + struct ice_aqc_txsched_move_grp_info_hdr hdr; + __le32 teid[]; +}; + struct ice_aqc_elem_info_bw { __le16 bw_profile_idx; __le16 bw_alloc; @@ -1528,6 +1540,16 @@ struct ice_aqc_lldp_stop_start_specific_agent { u8 reserved[15]; }; +/* LLDP Filter Control (direct 0x0A0A) */ +struct ice_aqc_lldp_filter_ctrl { + u8 cmd_flags; +#define ICE_AQC_LLDP_FILTER_ACTION_ADD 0x0 +#define ICE_AQC_LLDP_FILTER_ACTION_DELETE 0x1 + u8 reserved1; + __le16 vsi_num; + u8 reserved2[12]; +}; + /* Get/Set RSS key (indirect 0x0B04/0x0B02) */ struct ice_aqc_get_set_rss_key { #define ICE_AQC_GSET_RSS_KEY_VSI_VALID BIT(15) @@ -1851,6 +1873,7 @@ struct ice_aq_desc { struct ice_aqc_lldp_start lldp_start; struct ice_aqc_lldp_set_local_mib lldp_set_mib; struct ice_aqc_lldp_stop_start_specific_agent lldp_agent_ctrl; + struct ice_aqc_lldp_filter_ctrl lldp_filter_ctrl; struct ice_aqc_get_set_rss_lut get_set_rss_lut; struct ice_aqc_get_set_rss_key get_set_rss_key; struct ice_aqc_add_txqs add_txqs; @@ -1950,6 +1973,7 @@ enum ice_adminq_opc { ice_aqc_opc_add_sched_elems = 0x0401, ice_aqc_opc_cfg_sched_elems = 0x0403, ice_aqc_opc_get_sched_elems = 0x0404, + ice_aqc_opc_move_sched_elems = 0x0408, ice_aqc_opc_suspend_sched_elems = 0x0409, ice_aqc_opc_resume_sched_elems = 0x040A, ice_aqc_opc_query_port_ets = 0x040E, @@ -1991,6 +2015,7 @@ enum ice_adminq_opc { ice_aqc_opc_get_cee_dcb_cfg = 0x0A07, ice_aqc_opc_lldp_set_local_mib = 0x0A08, ice_aqc_opc_lldp_stop_start_specific_agent = 0x0A09, + ice_aqc_opc_lldp_filter_ctrl = 0x0A0A, /* RSS commands */ ice_aqc_opc_set_rss_key = 0x0B02, diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 607d33d05a0c..3d9475e222cd 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -110,7 +110,7 @@ ice_aq_manage_mac_read(struct ice_hw *hw, void *buf, u16 buf_size, if (status) return status; - resp = (struct ice_aqc_manage_mac_read_resp *)buf; + resp = buf; flags = le16_to_cpu(cmd->flags) & ICE_AQC_MAN_MAC_READ_M; if (!(flags & ICE_AQC_MAN_MAC_LAN_ADDR_VALID)) { @@ -907,6 +907,7 @@ enum ice_status ice_init_hw(struct ice_hw *hw) ice_debug(hw, ICE_DBG_SCHED, "Failed to get scheduler allocated resources\n"); goto err_unroll_alloc; } + ice_sched_get_psm_clk_freq(hw); /* Initialize port_info struct with scheduler data */ status = ice_sched_init_port(hw->port_info); @@ -1979,7 +1980,7 @@ ice_parse_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p, struct ice_aqc_list_caps_elem *cap_resp; u32 i; - cap_resp = (struct ice_aqc_list_caps_elem *)buf; + cap_resp = buf; memset(func_p, 0, sizeof(*func_p)); @@ -2109,7 +2110,7 @@ ice_parse_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p, struct ice_aqc_list_caps_elem *cap_resp; u32 i; - cap_resp = (struct ice_aqc_list_caps_elem *)buf; + cap_resp = buf; memset(dev_p, 0, sizeof(*dev_p)); @@ -4078,6 +4079,7 @@ static enum ice_status ice_replay_pre_init(struct ice_hw *hw) for (i = 0; i < ICE_SW_LKUP_LAST; i++) list_replace_init(&sw->recp_list[i].filt_rules, &sw->recp_list[i].filt_replay_rules); + ice_sched_replay_agg_vsi_preinit(hw); return 0; } @@ -4109,6 +4111,8 @@ enum ice_status ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle) return status; /* Replay per VSI all filters */ status = ice_replay_vsi_all_fltr(hw, vsi_handle); + if (!status) + status = ice_replay_vsi_agg(hw, vsi_handle); return status; } @@ -4122,6 +4126,7 @@ void ice_replay_post(struct ice_hw *hw) { /* Delete old entries from replay filter list head */ ice_rm_all_sw_replay_rule_info(hw); + ice_sched_replay_agg(hw); } /** @@ -4366,3 +4371,50 @@ ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size, return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); } + +/** + * ice_fw_supports_lldp_fltr - check NVM version supports lldp_fltr_ctrl + * @hw: pointer to HW struct + */ +bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw) +{ + if (hw->mac_type != ICE_MAC_E810) + return false; + + if (hw->api_maj_ver == ICE_FW_API_LLDP_FLTR_MAJ) { + if (hw->api_min_ver > ICE_FW_API_LLDP_FLTR_MIN) + return true; + if (hw->api_min_ver == ICE_FW_API_LLDP_FLTR_MIN && + hw->api_patch >= ICE_FW_API_LLDP_FLTR_PATCH) + return true; + } else if (hw->api_maj_ver > ICE_FW_API_LLDP_FLTR_MAJ) { + return true; + } + return false; +} + +/** + * ice_lldp_fltr_add_remove - add or remove a LLDP Rx switch filter + * @hw: pointer to HW struct + * @vsi_num: absolute HW index for VSI + * @add: boolean for if adding or removing a filter + */ +enum ice_status +ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add) +{ + struct ice_aqc_lldp_filter_ctrl *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.lldp_filter_ctrl; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_filter_ctrl); + + if (add) + cmd->cmd_flags = ICE_AQC_LLDP_FILTER_ACTION_ADD; + else + cmd->cmd_flags = ICE_AQC_LLDP_FILTER_ACTION_DELETE; + + cmd->vsi_num = cpu_to_le16(vsi_num); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); +} diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 3ebb973878c7..baf4064fcbfe 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -175,4 +175,7 @@ ice_sched_query_elem(struct ice_hw *hw, u32 node_teid, enum ice_status ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size, struct ice_sq_cd *cd); +bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw); +enum ice_status +ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add); #endif /* _ICE_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index 4db12d1f5808..b2d8a5932b1d 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -838,7 +838,7 @@ static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq) */ static void ice_debug_cq(struct ice_hw *hw, void *desc, void *buf, u16 buf_len) { - struct ice_aq_desc *cq_desc = (struct ice_aq_desc *)desc; + struct ice_aq_desc *cq_desc = desc; u16 len; if (!IS_ENABLED(CONFIG_DYNAMIC_DEBUG) && @@ -868,7 +868,7 @@ static void ice_debug_cq(struct ice_hw *hw, void *desc, void *buf, u16 buf_len) if (buf_len < len) len = buf_len; - ice_debug_array(hw, ICE_DBG_AQ_DESC_BUF, 16, 1, (u8 *)buf, len); + ice_debug_array(hw, ICE_DBG_AQ_DESC_BUF, 16, 1, buf, len); } } diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c index 2a3147ee0bbb..e42727941ef5 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@ -850,9 +850,9 @@ ice_get_ieee_or_cee_dcb_cfg(struct ice_port_info *pi, u8 dcbx_mode) return ICE_ERR_PARAM; if (dcbx_mode == ICE_DCBX_MODE_IEEE) - dcbx_cfg = &pi->local_dcbx_cfg; + dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; else if (dcbx_mode == ICE_DCBX_MODE_CEE) - dcbx_cfg = &pi->desired_dcbx_cfg; + dcbx_cfg = &pi->qos_cfg.desired_dcbx_cfg; /* Get Local DCB Config in case of ICE_DCBX_MODE_IEEE * or get CEE DCB Desired Config in case of ICE_DCBX_MODE_CEE @@ -863,7 +863,7 @@ ice_get_ieee_or_cee_dcb_cfg(struct ice_port_info *pi, u8 dcbx_mode) goto out; /* Get Remote DCB Config */ - dcbx_cfg = &pi->remote_dcbx_cfg; + dcbx_cfg = &pi->qos_cfg.remote_dcbx_cfg; ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE, ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbx_cfg); /* Don't treat ENOENT as an error for Remote MIBs */ @@ -892,14 +892,14 @@ enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi) ret = ice_aq_get_cee_dcb_cfg(pi->hw, &cee_cfg, NULL); if (!ret) { /* CEE mode */ - dcbx_cfg = &pi->local_dcbx_cfg; + dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_CEE; dcbx_cfg->tlv_status = le32_to_cpu(cee_cfg.tlv_status); ice_cee_to_dcb_cfg(&cee_cfg, dcbx_cfg); ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_CEE); } else if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) { /* CEE mode not enabled try querying IEEE data */ - dcbx_cfg = &pi->local_dcbx_cfg; + dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_IEEE; ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_IEEE); } @@ -916,26 +916,26 @@ enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi) */ enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change) { - struct ice_port_info *pi = hw->port_info; + struct ice_qos_cfg *qos_cfg = &hw->port_info->qos_cfg; enum ice_status ret = 0; if (!hw->func_caps.common_cap.dcb) return ICE_ERR_NOT_SUPPORTED; - pi->is_sw_lldp = true; + qos_cfg->is_sw_lldp = true; /* Get DCBX status */ - pi->dcbx_status = ice_get_dcbx_status(hw); + qos_cfg->dcbx_status = ice_get_dcbx_status(hw); - if (pi->dcbx_status == ICE_DCBX_STATUS_DONE || - pi->dcbx_status == ICE_DCBX_STATUS_IN_PROGRESS || - pi->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) { + if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DONE || + qos_cfg->dcbx_status == ICE_DCBX_STATUS_IN_PROGRESS || + qos_cfg->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) { /* Get current DCBX configuration */ - ret = ice_get_dcb_cfg(pi); + ret = ice_get_dcb_cfg(hw->port_info); if (ret) return ret; - pi->is_sw_lldp = false; - } else if (pi->dcbx_status == ICE_DCBX_STATUS_DIS) { + qos_cfg->is_sw_lldp = false; + } else if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DIS) { return ICE_ERR_NOT_READY; } @@ -943,7 +943,7 @@ enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change) if (enable_mib_change) { ret = ice_aq_cfg_lldp_mib_change(hw, true, NULL); if (ret) - pi->is_sw_lldp = true; + qos_cfg->is_sw_lldp = true; } return ret; @@ -958,21 +958,21 @@ enum ice_status ice_init_dcb(struct ice_hw *hw, bool enable_mib_change) */ enum ice_status ice_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_mib) { - struct ice_port_info *pi = hw->port_info; + struct ice_qos_cfg *qos_cfg = &hw->port_info->qos_cfg; enum ice_status ret; if (!hw->func_caps.common_cap.dcb) return ICE_ERR_NOT_SUPPORTED; /* Get DCBX status */ - pi->dcbx_status = ice_get_dcbx_status(hw); + qos_cfg->dcbx_status = ice_get_dcbx_status(hw); - if (pi->dcbx_status == ICE_DCBX_STATUS_DIS) + if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DIS) return ICE_ERR_NOT_READY; ret = ice_aq_cfg_lldp_mib_change(hw, ena_mib, NULL); if (!ret) - pi->is_sw_lldp = !ena_mib; + qos_cfg->is_sw_lldp = !ena_mib; return ret; } @@ -1270,7 +1270,7 @@ enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi) hw = pi->hw; /* update the HW local config */ - dcbcfg = &pi->local_dcbx_cfg; + dcbcfg = &pi->qos_cfg.local_dcbx_cfg; /* Allocate the LLDPDU */ lldpmib = devm_kzalloc(ice_hw_to_dev(hw), ICE_LLDPDU_SIZE, GFP_KERNEL); if (!lldpmib) diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 36abd6b7280c..1e8f71ffc8ce 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -28,7 +28,7 @@ void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) if (netdev_set_num_tc(netdev, vsi->tc_cfg.numtc)) return; - dcbcfg = &pf->hw.port_info->local_dcbx_cfg; + dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; ice_for_each_traffic_class(i) if (vsi->tc_cfg.ena_tc & BIT(i)) @@ -134,7 +134,7 @@ static u8 ice_dcb_get_mode(struct ice_port_info *port_info, bool host) else mode = DCB_CAP_DCBX_LLD_MANAGED; - if (port_info->local_dcbx_cfg.dcbx_mode & ICE_DCBX_MODE_CEE) + if (port_info->qos_cfg.local_dcbx_cfg.dcbx_mode & ICE_DCBX_MODE_CEE) return mode | DCB_CAP_DCBX_VER_CEE; else return mode | DCB_CAP_DCBX_VER_IEEE; @@ -277,10 +277,10 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) int ret = ICE_DCB_NO_HW_CHG; struct ice_vsi *pf_vsi; - curr_cfg = &pf->hw.port_info->local_dcbx_cfg; + curr_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; /* FW does not care if change happened */ - if (!pf->hw.port_info->is_sw_lldp) + if (!pf->hw.port_info->qos_cfg.is_sw_lldp) ret = ICE_DCB_HW_CHG_RST; /* Enable DCB tagging only when more than one TC */ @@ -327,7 +327,7 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) /* Only send new config to HW if we are in SW LLDP mode. Otherwise, * the new config came from the HW in the first place. */ - if (pf->hw.port_info->is_sw_lldp) { + if (pf->hw.port_info->qos_cfg.is_sw_lldp) { ret = ice_set_dcb_cfg(pf->hw.port_info); if (ret) { dev_err(dev, "Set DCB Config failed\n"); @@ -360,7 +360,7 @@ free_cfg: */ static void ice_cfg_etsrec_defaults(struct ice_port_info *pi) { - struct ice_dcbx_cfg *dcbcfg = &pi->local_dcbx_cfg; + struct ice_dcbx_cfg *dcbcfg = &pi->qos_cfg.local_dcbx_cfg; u8 i; /* Ensure ETS recommended DCB configuration is not already set */ @@ -446,7 +446,7 @@ void ice_dcb_rebuild(struct ice_pf *pf) mutex_lock(&pf->tc_mutex); - if (!pf->hw.port_info->is_sw_lldp) + if (!pf->hw.port_info->qos_cfg.is_sw_lldp) ice_cfg_etsrec_defaults(pf->hw.port_info); ret = ice_set_dcb_cfg(pf->hw.port_info); @@ -455,9 +455,9 @@ void ice_dcb_rebuild(struct ice_pf *pf) goto dcb_error; } - if (!pf->hw.port_info->is_sw_lldp) { + if (!pf->hw.port_info->qos_cfg.is_sw_lldp) { ret = ice_cfg_lldp_mib_change(&pf->hw, true); - if (ret && !pf->hw.port_info->is_sw_lldp) { + if (ret && !pf->hw.port_info->qos_cfg.is_sw_lldp) { dev_err(dev, "Failed to register for MIB changes\n"); goto dcb_error; } @@ -510,11 +510,12 @@ static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked) int ret = 0; pi = pf->hw.port_info; - newcfg = kmemdup(&pi->local_dcbx_cfg, sizeof(*newcfg), GFP_KERNEL); + newcfg = kmemdup(&pi->qos_cfg.local_dcbx_cfg, sizeof(*newcfg), + GFP_KERNEL); if (!newcfg) return -ENOMEM; - memset(&pi->local_dcbx_cfg, 0, sizeof(*newcfg)); + memset(&pi->qos_cfg.local_dcbx_cfg, 0, sizeof(*newcfg)); dev_info(ice_pf_to_dev(pf), "Configuring initial DCB values\n"); if (ice_pf_dcb_cfg(pf, newcfg, locked)) @@ -545,7 +546,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked) if (!dcbcfg) return -ENOMEM; - memset(&pi->local_dcbx_cfg, 0, sizeof(*dcbcfg)); + memset(&pi->qos_cfg.local_dcbx_cfg, 0, sizeof(*dcbcfg)); dcbcfg->etscfg.willing = ets_willing ? 1 : 0; dcbcfg->etscfg.maxtcs = hw->func_caps.common_cap.maxtc; @@ -608,7 +609,7 @@ static bool ice_dcb_tc_contig(u8 *prio_table) */ static int ice_dcb_noncontig_cfg(struct ice_pf *pf) { - struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg; + struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; struct device *dev = ice_pf_to_dev(pf); int ret; @@ -638,7 +639,7 @@ static int ice_dcb_noncontig_cfg(struct ice_pf *pf) */ void ice_pf_dcb_recfg(struct ice_pf *pf) { - struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->local_dcbx_cfg; + struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; u8 tc_map = 0; int v, ret; @@ -691,7 +692,7 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked) port_info = hw->port_info; err = ice_init_dcb(hw, false); - if (err && !port_info->is_sw_lldp) { + if (err && !port_info->qos_cfg.is_sw_lldp) { dev_err(dev, "Error initializing DCB %d\n", err); goto dcb_init_err; } @@ -858,7 +859,7 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, /* Update the remote cached instance and return */ ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE, ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, - &pi->remote_dcbx_cfg); + &pi->qos_cfg.remote_dcbx_cfg); if (ret) { dev_err(dev, "Failed to get remote DCB config\n"); return; @@ -868,10 +869,11 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, mutex_lock(&pf->tc_mutex); /* store the old configuration */ - tmp_dcbx_cfg = pf->hw.port_info->local_dcbx_cfg; + tmp_dcbx_cfg = pf->hw.port_info->qos_cfg.local_dcbx_cfg; /* Reset the old DCBX configuration data */ - memset(&pi->local_dcbx_cfg, 0, sizeof(pi->local_dcbx_cfg)); + memset(&pi->qos_cfg.local_dcbx_cfg, 0, + sizeof(pi->qos_cfg.local_dcbx_cfg)); /* Get updated DCBX data from firmware */ ret = ice_get_dcb_cfg(pf->hw.port_info); @@ -881,7 +883,8 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, } /* No change detected in DCBX configs */ - if (!memcmp(&tmp_dcbx_cfg, &pi->local_dcbx_cfg, sizeof(tmp_dcbx_cfg))) { + if (!memcmp(&tmp_dcbx_cfg, &pi->qos_cfg.local_dcbx_cfg, + sizeof(tmp_dcbx_cfg))) { dev_dbg(dev, "No change detected in DCBX configuration.\n"); goto out; } @@ -889,13 +892,13 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, pf->dcbx_cap = ice_dcb_get_mode(pi, false); need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg, - &pi->local_dcbx_cfg); - ice_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &pi->local_dcbx_cfg); + &pi->qos_cfg.local_dcbx_cfg); + ice_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &pi->qos_cfg.local_dcbx_cfg); if (!need_reconfig) goto out; /* Enable DCB tagging only when more than one TC */ - if (ice_dcb_get_num_tc(&pi->local_dcbx_cfg) > 1) { + if (ice_dcb_get_num_tc(&pi->qos_cfg.local_dcbx_cfg) > 1) { dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n"); set_bit(ICE_FLAG_DCB_ENA, pf->flags); } else { diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c index 87f91b750d59..fcfefad00d1c 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c @@ -34,12 +34,10 @@ static void ice_dcbnl_devreset(struct net_device *netdev) static int ice_dcbnl_getets(struct net_device *netdev, struct ieee_ets *ets) { struct ice_dcbx_cfg *dcbxcfg; - struct ice_port_info *pi; struct ice_pf *pf; pf = ice_netdev_to_pf(netdev); - pi = pf->hw.port_info; - dcbxcfg = &pi->local_dcbx_cfg; + dcbxcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; ets->willing = dcbxcfg->etscfg.willing; ets->ets_cap = dcbxcfg->etscfg.maxtcs; @@ -74,7 +72,7 @@ static int ice_dcbnl_setets(struct net_device *netdev, struct ieee_ets *ets) !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) return -EINVAL; - new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg; mutex_lock(&pf->tc_mutex); @@ -159,6 +157,7 @@ static u8 ice_dcbnl_getdcbx(struct net_device *netdev) static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode) { struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_qos_cfg *qos_cfg; /* No support for LLD_MANAGED modes or CEE+IEEE */ if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || @@ -171,10 +170,11 @@ static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode) return ICE_DCB_NO_HW_CHG; pf->dcbx_cap = mode; + qos_cfg = &pf->hw.port_info->qos_cfg; if (mode & DCB_CAP_DCBX_VER_CEE) - pf->hw.port_info->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_CEE; + qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_CEE; else - pf->hw.port_info->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_IEEE; + qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_IEEE; dev_info(ice_pf_to_dev(pf), "DCBx mode = 0x%x\n", mode); return ICE_DCB_HW_CHG_RST; @@ -225,7 +225,7 @@ static int ice_dcbnl_getpfc(struct net_device *netdev, struct ieee_pfc *pfc) struct ice_dcbx_cfg *dcbxcfg; int i; - dcbxcfg = &pi->local_dcbx_cfg; + dcbxcfg = &pi->qos_cfg.local_dcbx_cfg; pfc->pfc_cap = dcbxcfg->pfc.pfccap; pfc->pfc_en = dcbxcfg->pfc.pfcena; pfc->mbc = dcbxcfg->pfc.mbc; @@ -256,7 +256,7 @@ static int ice_dcbnl_setpfc(struct net_device *netdev, struct ieee_pfc *pfc) mutex_lock(&pf->tc_mutex); - new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg; if (pfc->pfc_cap) new_cfg->pfc.pfccap = pfc->pfc_cap; @@ -293,9 +293,9 @@ ice_dcbnl_get_pfc_cfg(struct net_device *netdev, int prio, u8 *setting) if (prio >= ICE_MAX_USER_PRIORITY) return; - *setting = (pi->local_dcbx_cfg.pfc.pfcena >> prio) & 0x1; + *setting = (pi->qos_cfg.local_dcbx_cfg.pfc.pfcena >> prio) & 0x1; dev_dbg(ice_pf_to_dev(pf), "Get PFC Config up=%d, setting=%d, pfcenable=0x%x\n", - prio, *setting, pi->local_dcbx_cfg.pfc.pfcena); + prio, *setting, pi->qos_cfg.local_dcbx_cfg.pfc.pfcena); } /** @@ -316,7 +316,7 @@ static void ice_dcbnl_set_pfc_cfg(struct net_device *netdev, int prio, u8 set) if (prio >= ICE_MAX_USER_PRIORITY) return; - new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg; new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc; if (set) @@ -338,7 +338,7 @@ static u8 ice_dcbnl_getpfcstate(struct net_device *netdev) struct ice_port_info *pi = pf->hw.port_info; /* Return enabled if any UP enabled for PFC */ - if (pi->local_dcbx_cfg.pfc.pfcena) + if (pi->qos_cfg.local_dcbx_cfg.pfc.pfcena) return 1; return 0; @@ -378,8 +378,8 @@ static u8 ice_dcbnl_setstate(struct net_device *netdev, u8 state) if (state) { set_bit(ICE_FLAG_DCB_ENA, pf->flags); - memcpy(&pf->hw.port_info->desired_dcbx_cfg, - &pf->hw.port_info->local_dcbx_cfg, + memcpy(&pf->hw.port_info->qos_cfg.desired_dcbx_cfg, + &pf->hw.port_info->qos_cfg.local_dcbx_cfg, sizeof(struct ice_dcbx_cfg)); } else { clear_bit(ICE_FLAG_DCB_ENA, pf->flags); @@ -413,7 +413,7 @@ ice_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int prio, if (prio >= ICE_MAX_USER_PRIORITY) return; - *pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio]; + *pgid = pi->qos_cfg.local_dcbx_cfg.etscfg.prio_table[prio]; dev_dbg(ice_pf_to_dev(pf), "Get PG config prio=%d tc=%d\n", prio, *pgid); } @@ -444,7 +444,7 @@ ice_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc, if (tc >= ICE_MAX_TRAFFIC_CLASS) return; - new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg; /* prio_type, bwg_id and bw_pct per UP are not supported */ @@ -474,7 +474,7 @@ ice_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 *bw_pct) if (pgid >= ICE_MAX_TRAFFIC_CLASS) return; - *bw_pct = pi->local_dcbx_cfg.etscfg.tcbwtable[pgid]; + *bw_pct = pi->qos_cfg.local_dcbx_cfg.etscfg.tcbwtable[pgid]; dev_dbg(ice_pf_to_dev(pf), "Get PG BW config tc=%d bw_pct=%d\n", pgid, *bw_pct); } @@ -498,7 +498,7 @@ ice_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 bw_pct) if (pgid >= ICE_MAX_TRAFFIC_CLASS) return; - new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg; new_cfg->etscfg.tcbwtable[pgid] = bw_pct; } @@ -528,7 +528,7 @@ ice_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int prio, if (prio >= ICE_MAX_USER_PRIORITY) return; - *pgid = pi->local_dcbx_cfg.etscfg.prio_table[prio]; + *pgid = pi->qos_cfg.local_dcbx_cfg.etscfg.prio_table[prio]; } /** @@ -699,9 +699,9 @@ static int ice_dcbnl_setapp(struct net_device *netdev, struct dcb_app *app) mutex_lock(&pf->tc_mutex); - new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg; - old_cfg = &pf->hw.port_info->local_dcbx_cfg; + old_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; if (old_cfg->numapps == ICE_DCBX_MAX_APPS) { ret = -EINVAL; @@ -751,7 +751,7 @@ static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app) return -EINVAL; mutex_lock(&pf->tc_mutex); - old_cfg = &pf->hw.port_info->local_dcbx_cfg; + old_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; if (old_cfg->numapps <= 1) goto delapp_out; @@ -760,7 +760,7 @@ static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app) if (ret) goto delapp_out; - new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg; for (i = 1; i < new_cfg->numapps; i++) { if (app->selector == new_cfg->app[i].selector && @@ -813,7 +813,7 @@ static u8 ice_dcbnl_cee_set_all(struct net_device *netdev) !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) return ICE_DCB_NO_HW_CHG; - new_cfg = &pf->hw.port_info->desired_dcbx_cfg; + new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg; mutex_lock(&pf->tc_mutex); @@ -884,7 +884,7 @@ void ice_dcbnl_set_all(struct ice_vsi *vsi) if (!test_bit(ICE_FLAG_DCB_ENA, pf->flags)) return; - dcbxcfg = &pi->local_dcbx_cfg; + dcbxcfg = &pi->qos_cfg.local_dcbx_cfg; for (i = 0; i < dcbxcfg->numapps; i++) { u8 prio, tc_map; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index e01b7e34da5e..5636c9b23896 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -1242,6 +1242,11 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) enum ice_status status; bool dcbx_agent_status; + /* Remove rule to direct LLDP packets to default VSI. + * The FW LLDP engine will now be consuming them. + */ + ice_cfg_sw_lldp(vsi, false, false); + /* AQ command to start FW LLDP agent will return an * error if the agent is already started */ @@ -1270,11 +1275,6 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) if (status) dev_dbg(dev, "Fail to init DCB\n"); - /* Remove rule to direct LLDP packets to default VSI. - * The FW LLDP engine will now be consuming them. - */ - ice_cfg_sw_lldp(vsi, false, false); - /* Register for MIB change events */ status = ice_cfg_lldp_mib_change(&pf->hw, true); if (status) @@ -2979,7 +2979,7 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) pause->rx_pause = 0; pause->tx_pause = 0; - dcbx_cfg = &pi->local_dcbx_cfg; + dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); if (!pcaps) @@ -3031,7 +3031,7 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) pi = vsi->port_info; hw_link_info = &pi->phy.link_info; - dcbx_cfg = &pi->local_dcbx_cfg; + dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; link_up = hw_link_info->link_info & ICE_AQ_LINK_UP; /* Changing the port's flow control is not supported if this isn't the diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index cf5b717b9293..5e1fd30c0a0f 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -2727,7 +2727,7 @@ static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid) case ICE_SID_XLT1_RSS: case ICE_SID_XLT1_ACL: case ICE_SID_XLT1_PE: - xlt1 = (struct ice_xlt1_section *)sect; + xlt1 = sect; src = xlt1->value; sect_len = le16_to_cpu(xlt1->count) * sizeof(*hw->blk[block_id].xlt1.t); @@ -2740,7 +2740,7 @@ static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid) case ICE_SID_XLT2_RSS: case ICE_SID_XLT2_ACL: case ICE_SID_XLT2_PE: - xlt2 = (struct ice_xlt2_section *)sect; + xlt2 = sect; src = (__force u8 *)xlt2->value; sect_len = le16_to_cpu(xlt2->count) * sizeof(*hw->blk[block_id].xlt2.t); @@ -2753,7 +2753,7 @@ static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid) case ICE_SID_PROFID_TCAM_RSS: case ICE_SID_PROFID_TCAM_ACL: case ICE_SID_PROFID_TCAM_PE: - pid = (struct ice_prof_id_section *)sect; + pid = sect; src = (u8 *)pid->entry; sect_len = le16_to_cpu(pid->count) * sizeof(*hw->blk[block_id].prof.t); @@ -2766,7 +2766,7 @@ static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid) case ICE_SID_PROFID_REDIR_RSS: case ICE_SID_PROFID_REDIR_ACL: case ICE_SID_PROFID_REDIR_PE: - pr = (struct ice_prof_redir_section *)sect; + pr = sect; src = pr->redir_value; sect_len = le16_to_cpu(pr->count) * sizeof(*hw->blk[block_id].prof_redir.t); @@ -2779,7 +2779,7 @@ static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid) case ICE_SID_FLD_VEC_RSS: case ICE_SID_FLD_VEC_ACL: case ICE_SID_FLD_VEC_PE: - es = (struct ice_sw_fv_section *)sect; + es = sect; src = (u8 *)es->fv; sect_len = (u32)(le16_to_cpu(es->count) * hw->blk[block_id].es.fvw) * diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 90abc8612a6a..093a1818a392 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -86,6 +86,9 @@ #define QRXFLXP_CNTXT_RXDID_PRIO_S 8 #define QRXFLXP_CNTXT_RXDID_PRIO_M ICE_M(0x7, 8) #define QRXFLXP_CNTXT_TS_M BIT(11) +#define GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_S 4 +#define GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_M ICE_M(0x3, 4) +#define GLGEN_CLKSTAT_SRC 0x000B826C #define GLGEN_RSTAT 0x000B8188 #define GLGEN_RSTAT_DEVSTATE_M ICE_M(0x3, 0) #define GLGEN_RSTCTL 0x000B8180 diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c new file mode 100644 index 000000000000..4599fc3b4ed8 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -0,0 +1,445 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2018-2021, Intel Corporation. */ + +/* Link Aggregation code */ + +#include "ice.h" +#include "ice_lag.h" + +/** + * ice_lag_nop_handler - no-op Rx handler to disable LAG + * @pskb: pointer to skb pointer + */ +rx_handler_result_t ice_lag_nop_handler(struct sk_buff __always_unused **pskb) +{ + return RX_HANDLER_PASS; +} + +/** + * ice_lag_set_primary - set PF LAG state as Primary + * @lag: LAG info struct + */ +static void ice_lag_set_primary(struct ice_lag *lag) +{ + struct ice_pf *pf = lag->pf; + + if (!pf) + return; + + if (lag->role != ICE_LAG_UNSET && lag->role != ICE_LAG_BACKUP) { + dev_warn(ice_pf_to_dev(pf), "%s: Attempt to be Primary, but incompatible state.\n", + netdev_name(lag->netdev)); + return; + } + + lag->role = ICE_LAG_PRIMARY; +} + +/** + * ice_lag_set_backup - set PF LAG state to Backup + * @lag: LAG info struct + */ +static void ice_lag_set_backup(struct ice_lag *lag) +{ + struct ice_pf *pf = lag->pf; + + if (!pf) + return; + + if (lag->role != ICE_LAG_UNSET && lag->role != ICE_LAG_PRIMARY) { + dev_dbg(ice_pf_to_dev(pf), "%s: Attempt to be Backup, but incompatible state\n", + netdev_name(lag->netdev)); + return; + } + + lag->role = ICE_LAG_BACKUP; +} + +/** + * ice_display_lag_info - print LAG info + * @lag: LAG info struct + */ +static void ice_display_lag_info(struct ice_lag *lag) +{ + const char *name, *peer, *upper, *role, *bonded, *master; + struct device *dev = &lag->pf->pdev->dev; + + name = lag->netdev ? netdev_name(lag->netdev) : "unset"; + peer = lag->peer_netdev ? netdev_name(lag->peer_netdev) : "unset"; + upper = lag->upper_netdev ? netdev_name(lag->upper_netdev) : "unset"; + master = lag->master ? "TRUE" : "FALSE"; + bonded = lag->bonded ? "BONDED" : "UNBONDED"; + + switch (lag->role) { + case ICE_LAG_NONE: + role = "NONE"; + break; + case ICE_LAG_PRIMARY: + role = "PRIMARY"; + break; + case ICE_LAG_BACKUP: + role = "BACKUP"; + break; + case ICE_LAG_UNSET: + role = "UNSET"; + break; + default: + role = "ERROR"; + } + + dev_dbg(dev, "%s %s, peer:%s, upper:%s, role:%s, master:%s\n", name, + bonded, peer, upper, role, master); +} + +/** + * ice_lag_info_event - handle NETDEV_BONDING_INFO event + * @lag: LAG info struct + * @ptr: opaque data pointer + * + * ptr is to be cast to (netdev_notifier_bonding_info *) + */ +static void ice_lag_info_event(struct ice_lag *lag, void *ptr) +{ + struct net_device *event_netdev, *netdev_tmp; + struct netdev_notifier_bonding_info *info; + struct netdev_bonding_info *bonding_info; + const char *lag_netdev_name; + + event_netdev = netdev_notifier_info_to_dev(ptr); + info = ptr; + lag_netdev_name = netdev_name(lag->netdev); + bonding_info = &info->bonding_info; + + if (event_netdev != lag->netdev || !lag->bonded || !lag->upper_netdev) + return; + + if (bonding_info->master.bond_mode != BOND_MODE_ACTIVEBACKUP) { + netdev_dbg(lag->netdev, "Bonding event recv, but mode not active/backup\n"); + goto lag_out; + } + + if (strcmp(bonding_info->slave.slave_name, lag_netdev_name)) { + netdev_dbg(lag->netdev, "Bonding event recv, but slave info not for us\n"); + goto lag_out; + } + + rcu_read_lock(); + for_each_netdev_in_bond_rcu(lag->upper_netdev, netdev_tmp) { + if (!netif_is_ice(netdev_tmp)) + continue; + + if (netdev_tmp && netdev_tmp != lag->netdev && + lag->peer_netdev != netdev_tmp) { + dev_hold(netdev_tmp); + lag->peer_netdev = netdev_tmp; + } + } + rcu_read_unlock(); + + if (bonding_info->slave.state) + ice_lag_set_backup(lag); + else + ice_lag_set_primary(lag); + +lag_out: + ice_display_lag_info(lag); +} + +/** + * ice_lag_link - handle LAG link event + * @lag: LAG info struct + * @info: info from the netdev notifier + */ +static void +ice_lag_link(struct ice_lag *lag, struct netdev_notifier_changeupper_info *info) +{ + struct net_device *netdev_tmp, *upper = info->upper_dev; + struct ice_pf *pf = lag->pf; + int peers = 0; + + if (lag->bonded) + dev_warn(ice_pf_to_dev(pf), "%s Already part of a bond\n", + netdev_name(lag->netdev)); + + rcu_read_lock(); + for_each_netdev_in_bond_rcu(upper, netdev_tmp) + peers++; + rcu_read_unlock(); + + if (lag->upper_netdev != upper) { + dev_hold(upper); + lag->upper_netdev = upper; + } + + ice_clear_sriov_cap(pf); + + lag->bonded = true; + lag->role = ICE_LAG_UNSET; + + /* if this is the first element in an LAG mark as master */ + lag->master = !!(peers == 1); +} + +/** + * ice_lag_unlink - handle unlink event + * @lag: LAG info struct + * @info: info from netdev notification + */ +static void +ice_lag_unlink(struct ice_lag *lag, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *netdev_tmp, *upper = info->upper_dev; + struct ice_pf *pf = lag->pf; + bool found = false; + + if (!lag->bonded) { + netdev_dbg(lag->netdev, "bonding unlink event on non-LAG netdev\n"); + return; + } + + /* determine if we are in the new LAG config or not */ + rcu_read_lock(); + for_each_netdev_in_bond_rcu(upper, netdev_tmp) { + if (netdev_tmp == lag->netdev) { + found = true; + break; + } + } + rcu_read_unlock(); + + if (found) + return; + + if (lag->upper_netdev) { + dev_put(lag->upper_netdev); + lag->upper_netdev = NULL; + } + + if (lag->peer_netdev) { + dev_put(lag->peer_netdev); + lag->peer_netdev = NULL; + } + + ice_set_sriov_cap(pf); + lag->bonded = false; + lag->role = ICE_LAG_NONE; +} + +/** + * ice_lag_changeupper_event - handle LAG changeupper event + * @lag: LAG info struct + * @ptr: opaque pointer data + * + * ptr is to be cast into netdev_notifier_changeupper_info + */ +static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) +{ + struct netdev_notifier_changeupper_info *info; + struct net_device *netdev; + + info = ptr; + netdev = netdev_notifier_info_to_dev(ptr); + + /* not for this netdev */ + if (netdev != lag->netdev) + return; + + if (!info->upper_dev) { + netdev_dbg(netdev, "changeupper rcvd, but no upper defined\n"); + return; + } + + netdev_dbg(netdev, "bonding %s\n", info->linking ? "LINK" : "UNLINK"); + + if (!netif_is_lag_master(info->upper_dev)) { + netdev_dbg(netdev, "changeupper rcvd, but not master. bail\n"); + return; + } + + if (info->linking) + ice_lag_link(lag, info); + else + ice_lag_unlink(lag, info); + + ice_display_lag_info(lag); +} + +/** + * ice_lag_changelower_event - handle LAG changelower event + * @lag: LAG info struct + * @ptr: opaque data pointer + * + * ptr to be cast to netdev_notifier_changelowerstate_info + */ +static void ice_lag_changelower_event(struct ice_lag *lag, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + + if (netdev != lag->netdev) + return; + + netdev_dbg(netdev, "bonding info\n"); + + if (!netif_is_lag_port(netdev)) + netdev_dbg(netdev, "CHANGELOWER rcvd, but netdev not in LAG. Bail\n"); +} + +/** + * ice_lag_event_handler - handle LAG events from netdev + * @notif_blk: notifier block registered by this netdev + * @event: event type + * @ptr: opaque data containing notifier event + */ +static int +ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event, + void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct ice_lag *lag; + + lag = container_of(notif_blk, struct ice_lag, notif_block); + + if (!lag->netdev) + return NOTIFY_DONE; + + /* Check that the netdev is in the working namespace */ + if (!net_eq(dev_net(netdev), &init_net)) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_CHANGEUPPER: + ice_lag_changeupper_event(lag, ptr); + break; + case NETDEV_CHANGELOWERSTATE: + ice_lag_changelower_event(lag, ptr); + break; + case NETDEV_BONDING_INFO: + ice_lag_info_event(lag, ptr); + break; + default: + break; + } + + return NOTIFY_DONE; +} + +/** + * ice_register_lag_handler - register LAG handler on netdev + * @lag: LAG struct + */ +static int ice_register_lag_handler(struct ice_lag *lag) +{ + struct device *dev = ice_pf_to_dev(lag->pf); + struct notifier_block *notif_blk; + + notif_blk = &lag->notif_block; + + if (!notif_blk->notifier_call) { + notif_blk->notifier_call = ice_lag_event_handler; + if (register_netdevice_notifier(notif_blk)) { + notif_blk->notifier_call = NULL; + dev_err(dev, "FAIL register LAG event handler!\n"); + return -EINVAL; + } + dev_dbg(dev, "LAG event handler registered\n"); + } + return 0; +} + +/** + * ice_unregister_lag_handler - unregister LAG handler on netdev + * @lag: LAG struct + */ +static void ice_unregister_lag_handler(struct ice_lag *lag) +{ + struct device *dev = ice_pf_to_dev(lag->pf); + struct notifier_block *notif_blk; + + notif_blk = &lag->notif_block; + if (notif_blk->notifier_call) { + unregister_netdevice_notifier(notif_blk); + dev_dbg(dev, "LAG event handler unregistered\n"); + } +} + +/** + * ice_init_lag - initialize support for LAG + * @pf: PF struct + * + * Alloc memory for LAG structs and initialize the elements. + * Memory will be freed in ice_deinit_lag + */ +int ice_init_lag(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + struct ice_lag *lag; + struct ice_vsi *vsi; + int err; + + pf->lag = kzalloc(sizeof(*lag), GFP_KERNEL); + if (!pf->lag) + return -ENOMEM; + lag = pf->lag; + + vsi = ice_get_main_vsi(pf); + if (!vsi) { + dev_err(dev, "couldn't get main vsi, link aggregation init fail\n"); + err = -EIO; + goto lag_error; + } + + lag->pf = pf; + lag->netdev = vsi->netdev; + lag->role = ICE_LAG_NONE; + lag->bonded = false; + lag->peer_netdev = NULL; + lag->upper_netdev = NULL; + lag->notif_block.notifier_call = NULL; + + err = ice_register_lag_handler(lag); + if (err) { + dev_warn(dev, "INIT LAG: Failed to register event handler\n"); + goto lag_error; + } + + ice_display_lag_info(lag); + + dev_dbg(dev, "INIT LAG complete\n"); + return 0; + +lag_error: + kfree(lag); + pf->lag = NULL; + return err; +} + +/** + * ice_deinit_lag - Clean up LAG + * @pf: PF struct + * + * Clean up kernel LAG info and free memory + * This function is meant to only be called on driver remove/shutdown + */ +void ice_deinit_lag(struct ice_pf *pf) +{ + struct ice_lag *lag; + + lag = pf->lag; + + if (!lag) + return; + + if (lag->pf) + ice_unregister_lag_handler(lag); + + if (lag->upper_netdev) + dev_put(lag->upper_netdev); + + if (lag->peer_netdev) + dev_put(lag->peer_netdev); + + kfree(lag); + + pf->lag = NULL; +} diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h new file mode 100644 index 000000000000..c2e3688dd8fd --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_lag.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018-2021, Intel Corporation. */ + +#ifndef _ICE_LAG_H_ +#define _ICE_LAG_H_ + +#include <linux/netdevice.h> + +/* LAG roles for netdev */ +enum ice_lag_role { + ICE_LAG_NONE, + ICE_LAG_PRIMARY, + ICE_LAG_BACKUP, + ICE_LAG_UNSET +}; + +struct ice_pf; + +/* LAG info struct */ +struct ice_lag { + struct ice_pf *pf; /* backlink to PF struct */ + struct net_device *netdev; /* this PF's netdev */ + struct net_device *peer_netdev; + struct net_device *upper_netdev; /* upper bonding netdev */ + struct notifier_block notif_block; + u8 bonded:1; /* currently bonded */ + u8 master:1; /* this is a master */ + u8 handler:1; /* did we register a rx_netdev_handler */ + /* each thing blocking bonding will increment this value by one. + * If this value is zero, then bonding is allowed. + */ + u16 dis_lag; + u8 role; +}; + +int ice_init_lag(struct ice_pf *pf); +void ice_deinit_lag(struct ice_pf *pf); +rx_handler_result_t ice_lag_nop_handler(struct sk_buff **pskb); + +/** + * ice_disable_lag - increment LAG disable count + * @lag: LAG struct + */ +static inline void ice_disable_lag(struct ice_lag *lag) +{ + /* If LAG this PF is not already disabled, disable it */ + rtnl_lock(); + if (!netdev_is_rx_handler_busy(lag->netdev)) { + if (!netdev_rx_handler_register(lag->netdev, + ice_lag_nop_handler, + NULL)) + lag->handler = true; + } + rtnl_unlock(); + lag->dis_lag++; +} + +/** + * ice_enable_lag - decrement disable count for a PF + * @lag: LAG struct + * + * Decrement the disable counter for a port, and if that count reaches + * zero, then remove the no-op Rx handler from that netdev + */ +static inline void ice_enable_lag(struct ice_lag *lag) +{ + if (lag->dis_lag) + lag->dis_lag--; + if (!lag->dis_lag && lag->handler) { + rtnl_lock(); + netdev_rx_handler_unregister(lag->netdev); + rtnl_unlock(); + lag->handler = false; + } +} + +/** + * ice_is_lag_dis - is LAG disabled + * @lag: LAG struct + * + * Return true if bonding is disabled + */ +static inline bool ice_is_lag_dis(struct ice_lag *lag) +{ + return !!(lag->dis_lag); +} +#endif /* _ICE_LAG_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index ad9c22a1b97a..8d4e2ad4328d 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2078,7 +2078,7 @@ err_out: static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi) { - struct ice_dcbx_cfg *cfg = &vsi->port_info->local_dcbx_cfg; + struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg; vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg); vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg); @@ -2145,11 +2145,18 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create) dev = ice_pf_to_dev(pf); eth_fltr = create ? ice_fltr_add_eth : ice_fltr_remove_eth; - if (tx) + if (tx) { status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_TX, ICE_DROP_PACKET); - else - status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_RX, ICE_FWD_TO_VSI); + } else { + if (ice_fw_supports_lldp_fltr_ctrl(&pf->hw)) { + status = ice_lldp_fltr_add_remove(&pf->hw, vsi->vsi_num, + create); + } else { + status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_RX, + ICE_FWD_TO_VSI); + } + } if (status) dev_err(dev, "Fail %s %s LLDP rule on VSI %i error: %s\n", @@ -2158,6 +2165,126 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create) } /** + * ice_set_agg_vsi - sets up scheduler aggregator node and move VSI into it + * @vsi: pointer to the VSI + * + * This function will allocate new scheduler aggregator now if needed and will + * move specified VSI into it. + */ +static void ice_set_agg_vsi(struct ice_vsi *vsi) +{ + struct device *dev = ice_pf_to_dev(vsi->back); + struct ice_agg_node *agg_node_iter = NULL; + u32 agg_id = ICE_INVALID_AGG_NODE_ID; + struct ice_agg_node *agg_node = NULL; + int node_offset, max_agg_nodes = 0; + struct ice_port_info *port_info; + struct ice_pf *pf = vsi->back; + u32 agg_node_id_start = 0; + enum ice_status status; + + /* create (as needed) scheduler aggregator node and move VSI into + * corresponding aggregator node + * - PF aggregator node to contains VSIs of type _PF and _CTRL + * - VF aggregator nodes will contain VF VSI + */ + port_info = pf->hw.port_info; + if (!port_info) + return; + + switch (vsi->type) { + case ICE_VSI_CTRL: + case ICE_VSI_LB: + case ICE_VSI_PF: + max_agg_nodes = ICE_MAX_PF_AGG_NODES; + agg_node_id_start = ICE_PF_AGG_NODE_ID_START; + agg_node_iter = &pf->pf_agg_node[0]; + break; + case ICE_VSI_VF: + /* user can create 'n' VFs on a given PF, but since max children + * per aggregator node can be only 64. Following code handles + * aggregator(s) for VF VSIs, either selects a agg_node which + * was already created provided num_vsis < 64, otherwise + * select next available node, which will be created + */ + max_agg_nodes = ICE_MAX_VF_AGG_NODES; + agg_node_id_start = ICE_VF_AGG_NODE_ID_START; + agg_node_iter = &pf->vf_agg_node[0]; + break; + default: + /* other VSI type, handle later if needed */ + dev_dbg(dev, "unexpected VSI type %s\n", + ice_vsi_type_str(vsi->type)); + return; + } + + /* find the appropriate aggregator node */ + for (node_offset = 0; node_offset < max_agg_nodes; node_offset++) { + /* see if we can find space in previously created + * node if num_vsis < 64, otherwise skip + */ + if (agg_node_iter->num_vsis && + agg_node_iter->num_vsis == ICE_MAX_VSIS_IN_AGG_NODE) { + agg_node_iter++; + continue; + } + + if (agg_node_iter->valid && + agg_node_iter->agg_id != ICE_INVALID_AGG_NODE_ID) { + agg_id = agg_node_iter->agg_id; + agg_node = agg_node_iter; + break; + } + + /* find unclaimed agg_id */ + if (agg_node_iter->agg_id == ICE_INVALID_AGG_NODE_ID) { + agg_id = node_offset + agg_node_id_start; + agg_node = agg_node_iter; + break; + } + /* move to next agg_node */ + agg_node_iter++; + } + + if (!agg_node) + return; + + /* if selected aggregator node was not created, create it */ + if (!agg_node->valid) { + status = ice_cfg_agg(port_info, agg_id, ICE_AGG_TYPE_AGG, + (u8)vsi->tc_cfg.ena_tc); + if (status) { + dev_err(dev, "unable to create aggregator node with agg_id %u\n", + agg_id); + return; + } + /* aggregator node is created, store the neeeded info */ + agg_node->valid = true; + agg_node->agg_id = agg_id; + } + + /* move VSI to corresponding aggregator node */ + status = ice_move_vsi_to_agg(port_info, agg_id, vsi->idx, + (u8)vsi->tc_cfg.ena_tc); + if (status) { + dev_err(dev, "unable to move VSI idx %u into aggregator %u node", + vsi->idx, agg_id); + return; + } + + /* keep active children count for aggregator node */ + agg_node->num_vsis++; + + /* cache the 'agg_id' in VSI, so that after reset - VSI will be moved + * to aggregator node + */ + vsi->agg_node = agg_node; + dev_dbg(dev, "successfully moved VSI idx %u tc_bitmap 0x%x) into aggregator node %d which has num_vsis %u\n", + vsi->idx, vsi->tc_cfg.ena_tc, vsi->agg_node->agg_id, + vsi->agg_node->num_vsis); +} + +/** * ice_vsi_setup - Set up a VSI by a given type * @pf: board private structure * @pi: pointer to the port_info instance @@ -2327,6 +2454,8 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, ice_cfg_sw_lldp(vsi, true, true); } + if (!vsi->agg_node) + ice_set_agg_vsi(vsi); return vsi; unroll_clear_rings: @@ -2342,6 +2471,8 @@ unroll_vsi_init: unroll_get_qs: ice_vsi_put_qs(vsi); unroll_vsi_alloc: + if (vsi_type == ICE_VSI_VF) + ice_enable_lag(pf->lag); ice_vsi_clear(vsi); return NULL; @@ -2669,6 +2800,9 @@ int ice_vsi_release(struct ice_vsi *vsi) vsi->netdev = NULL; } + if (vsi->type == ICE_VSI_VF && + vsi->agg_node && vsi->agg_node->valid) + vsi->agg_node->num_vsis--; ice_vsi_clear_rings(vsi); ice_vsi_put_qs(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 98cd44a3ccf7..813ec6b8ac23 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -44,6 +44,11 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type); static void ice_vsi_release_all(struct ice_pf *pf); +bool netif_is_ice(struct net_device *dev) +{ + return dev && (dev->netdev_ops == &ice_netdev_ops); +} + /** * ice_get_tx_pending - returns number of Tx descriptors not processed * @ring: the ring of descriptors @@ -430,11 +435,19 @@ static void ice_sync_fltr_subtask(struct ice_pf *pf) */ static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked) { + int node; int v; ice_for_each_vsi(pf, v) if (pf->vsi[v]) ice_dis_vsi(pf->vsi[v], locked); + + for (node = 0; node < ICE_MAX_PF_AGG_NODES; node++) + pf->pf_agg_node[node].num_vsis = 0; + + for (node = 0; node < ICE_MAX_VF_AGG_NODES; node++) + pf->vf_agg_node[node].num_vsis = 0; + } /** @@ -2476,6 +2489,22 @@ free_qmap: } /** + * ice_vsi_rx_napi_schedule - Schedule napi on RX queues from VSI + * @vsi: VSI to schedule napi on + */ +static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi) +{ + int i; + + ice_for_each_rxq(vsi, i) { + struct ice_ring *rx_ring = vsi->rx_rings[i]; + + if (rx_ring->xsk_pool) + napi_schedule(&rx_ring->q_vector->napi); + } +} + +/** * ice_xdp_setup_prog - Add or remove XDP eBPF program * @vsi: VSI to setup XDP for * @prog: XDP program @@ -2519,16 +2548,8 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, if (if_running) ret = ice_up(vsi); - if (!ret && prog && vsi->xsk_pools) { - int i; - - ice_for_each_rxq(vsi, i) { - struct ice_ring *rx_ring = vsi->rx_rings[i]; - - if (rx_ring->xsk_pool) - napi_schedule(&rx_ring->q_vector->napi); - } - } + if (!ret && prog) + ice_vsi_rx_napi_schedule(vsi); return (ret || xdp_ring_err) ? -ENOMEM : 0; } @@ -3370,28 +3391,20 @@ static int ice_init_pf(struct ice_pf *pf) */ static int ice_ena_msix_range(struct ice_pf *pf) { + int v_left, v_actual, v_other, v_budget = 0; struct device *dev = ice_pf_to_dev(pf); - int v_left, v_actual, v_budget = 0; int needed, err, i; v_left = pf->hw.func_caps.common_cap.num_msix_vectors; - /* reserve one vector for miscellaneous handler */ - needed = 1; - if (v_left < needed) - goto no_hw_vecs_left_err; - v_budget += needed; - v_left -= needed; - - /* reserve vectors for LAN traffic */ - needed = min_t(int, num_online_cpus(), v_left); + /* reserve for LAN miscellaneous handler */ + needed = ICE_MIN_LAN_OICR_MSIX; if (v_left < needed) goto no_hw_vecs_left_err; - pf->num_lan_msix = needed; v_budget += needed; v_left -= needed; - /* reserve one vector for flow director */ + /* reserve for flow director */ if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) { needed = ICE_FDIR_MSIX; if (v_left < needed) @@ -3400,9 +3413,19 @@ static int ice_ena_msix_range(struct ice_pf *pf) v_left -= needed; } + /* total used for non-traffic vectors */ + v_other = v_budget; + + /* reserve vectors for LAN traffic */ + needed = min_t(int, num_online_cpus(), v_left); + if (v_left < needed) + goto no_hw_vecs_left_err; + pf->num_lan_msix = needed; + v_budget += needed; + v_left -= needed; + pf->msix_entries = devm_kcalloc(dev, v_budget, sizeof(*pf->msix_entries), GFP_KERNEL); - if (!pf->msix_entries) { err = -ENOMEM; goto exit_err; @@ -3414,7 +3437,6 @@ static int ice_ena_msix_range(struct ice_pf *pf) /* actually reserve the vectors */ v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries, ICE_MIN_MSIX, v_budget); - if (v_actual < 0) { dev_err(dev, "unable to reserve MSI-X vectors\n"); err = v_actual; @@ -3431,7 +3453,16 @@ static int ice_ena_msix_range(struct ice_pf *pf) err = -ERANGE; goto msix_err; } else { - pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; + int v_traffic = v_actual - v_other; + + if (v_actual == ICE_MIN_MSIX || + v_traffic < ICE_MIN_LAN_TXRX_MSIX) + pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; + else + pf->num_lan_msix = v_traffic; + + dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n", + pf->num_lan_msix); } } @@ -4227,6 +4258,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) ice_cfg_lldp_mib_change(&pf->hw, true); } + if (ice_init_lag(pf)) + dev_warn(dev, "Failed to init link aggregation support\n"); + /* print PCI link speed and width */ pcie_print_link_status(pf->pdev); @@ -4349,6 +4383,7 @@ static void ice_remove(struct pci_dev *pdev) ice_aq_cancel_waiting_tasks(pf); mutex_destroy(&(&pf->hw)->fdir_fltr_lock); + ice_deinit_lag(pf); if (!ice_is_safe_mode(pf)) ice_remove_arfs(pf); ice_setup_mc_magic_wake(pf); @@ -6152,7 +6187,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) err = ice_down(vsi); if (err) { - netdev_err(netdev, "change MTU if_up err %d\n", err); + netdev_err(netdev, "change MTU if_down err %d\n", err); return err; } diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index f0912e44d4ad..2403cb38b93c 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -431,6 +431,27 @@ ice_aq_cfg_sched_elems(struct ice_hw *hw, u16 elems_req, } /** + * ice_aq_move_sched_elems - move scheduler elements + * @hw: pointer to the HW struct + * @grps_req: number of groups to move + * @buf: pointer to buffer + * @buf_size: buffer size in bytes + * @grps_movd: returns total number of groups moved + * @cd: pointer to command details structure or NULL + * + * Move scheduling elements (0x0408) + */ +static enum ice_status +ice_aq_move_sched_elems(struct ice_hw *hw, u16 grps_req, + struct ice_aqc_move_elem *buf, u16 buf_size, + u16 *grps_movd, struct ice_sq_cd *cd) +{ + return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_move_sched_elems, + grps_req, (void *)buf, buf_size, + grps_movd, cd); +} + +/** * ice_aq_suspend_sched_elems - suspend scheduler elements * @hw: pointer to the HW struct * @elems_req: number of elements to suspend @@ -1022,6 +1043,28 @@ static u8 ice_sched_get_vsi_layer(struct ice_hw *hw) } /** + * ice_sched_get_agg_layer - get the current aggregator layer number + * @hw: pointer to the HW struct + * + * This function returns the current aggregator layer number + */ +static u8 ice_sched_get_agg_layer(struct ice_hw *hw) +{ + /* Num Layers aggregator layer + * 9 4 + * 7 or less sw_entry_point_layer + */ + /* calculate the aggregator layer based on number of layers. */ + if (hw->num_tx_sched_layers > ICE_AGG_LAYER_OFFSET + 1) { + u8 layer = hw->num_tx_sched_layers - ICE_AGG_LAYER_OFFSET; + + if (layer > hw->sw_entry_point_layer) + return layer; + } + return hw->sw_entry_point_layer; +} + +/** * ice_rm_dflt_leaf_node - remove the default leaf node in the tree * @pi: port information structure * @@ -1239,6 +1282,46 @@ sched_query_out: } /** + * ice_sched_get_psm_clk_freq - determine the PSM clock frequency + * @hw: pointer to the HW struct + * + * Determine the PSM clock frequency and store in HW struct + */ +void ice_sched_get_psm_clk_freq(struct ice_hw *hw) +{ + u32 val, clk_src; + + val = rd32(hw, GLGEN_CLKSTAT_SRC); + clk_src = (val & GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_M) >> + GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_S; + +#define PSM_CLK_SRC_367_MHZ 0x0 +#define PSM_CLK_SRC_416_MHZ 0x1 +#define PSM_CLK_SRC_446_MHZ 0x2 +#define PSM_CLK_SRC_390_MHZ 0x3 + + switch (clk_src) { + case PSM_CLK_SRC_367_MHZ: + hw->psm_clk_freq = ICE_PSM_CLK_367MHZ_IN_HZ; + break; + case PSM_CLK_SRC_416_MHZ: + hw->psm_clk_freq = ICE_PSM_CLK_416MHZ_IN_HZ; + break; + case PSM_CLK_SRC_446_MHZ: + hw->psm_clk_freq = ICE_PSM_CLK_446MHZ_IN_HZ; + break; + case PSM_CLK_SRC_390_MHZ: + hw->psm_clk_freq = ICE_PSM_CLK_390MHZ_IN_HZ; + break; + default: + ice_debug(hw, ICE_DBG_SCHED, "PSM clk_src unexpected %u\n", + clk_src); + /* fall back to a safe default */ + hw->psm_clk_freq = ICE_PSM_CLK_446MHZ_IN_HZ; + } +} + +/** * ice_sched_find_node_in_subtree - Find node in part of base node subtree * @hw: pointer to the HW struct * @base: pointer to the base node @@ -1364,7 +1447,7 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc, /** * ice_sched_get_vsi_node - Get a VSI node based on VSI ID - * @hw: pointer to the HW struct + * @pi: pointer to the port information structure * @tc_node: pointer to the TC node * @vsi_handle: software VSI handle * @@ -1372,14 +1455,14 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc, * TC branch */ static struct ice_sched_node * -ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node, +ice_sched_get_vsi_node(struct ice_port_info *pi, struct ice_sched_node *tc_node, u16 vsi_handle) { struct ice_sched_node *node; u8 vsi_layer; - vsi_layer = ice_sched_get_vsi_layer(hw); - node = ice_sched_get_first_node(hw->port_info, tc_node, vsi_layer); + vsi_layer = ice_sched_get_vsi_layer(pi->hw); + node = ice_sched_get_first_node(pi, tc_node, vsi_layer); /* Check whether it already exists */ while (node) { @@ -1392,6 +1475,38 @@ ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node, } /** + * ice_sched_get_agg_node - Get an aggregator node based on aggregator ID + * @pi: pointer to the port information structure + * @tc_node: pointer to the TC node + * @agg_id: aggregator ID + * + * This function retrieves an aggregator node for a given aggregator ID from + * a given TC branch + */ +static struct ice_sched_node * +ice_sched_get_agg_node(struct ice_port_info *pi, struct ice_sched_node *tc_node, + u32 agg_id) +{ + struct ice_sched_node *node; + struct ice_hw *hw = pi->hw; + u8 agg_layer; + + if (!hw) + return NULL; + agg_layer = ice_sched_get_agg_layer(hw); + node = ice_sched_get_first_node(pi, tc_node, agg_layer); + + /* Check whether it already exists */ + while (node) { + if (node->agg_id == agg_id) + return node; + node = node->sibling; + } + + return node; +} + +/** * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes * @hw: pointer to the HW struct * @num_qs: number of queues @@ -1444,7 +1559,7 @@ ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, qgl = ice_sched_get_qgrp_layer(hw); vsil = ice_sched_get_vsi_layer(hw); - parent = ice_sched_get_vsi_node(hw, tc_node, vsi_handle); + parent = ice_sched_get_vsi_node(pi, tc_node, vsi_handle); for (i = vsil + 1; i <= qgl; i++) { if (!parent) return ICE_ERR_CFG; @@ -1477,7 +1592,7 @@ ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, /** * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes - * @hw: pointer to the HW struct + * @pi: pointer to the port info structure * @tc_node: pointer to TC node * @num_nodes: pointer to num nodes array * @@ -1486,15 +1601,15 @@ ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, * layers */ static void -ice_sched_calc_vsi_support_nodes(struct ice_hw *hw, +ice_sched_calc_vsi_support_nodes(struct ice_port_info *pi, struct ice_sched_node *tc_node, u16 *num_nodes) { struct ice_sched_node *node; u8 vsil; int i; - vsil = ice_sched_get_vsi_layer(hw); - for (i = vsil; i >= hw->sw_entry_point_layer; i--) + vsil = ice_sched_get_vsi_layer(pi->hw); + for (i = vsil; i >= pi->hw->sw_entry_point_layer; i--) /* Add intermediate nodes if TC has no children and * need at least one node for VSI */ @@ -1504,11 +1619,10 @@ ice_sched_calc_vsi_support_nodes(struct ice_hw *hw, /* If intermediate nodes are reached max children * then add a new one. */ - node = ice_sched_get_first_node(hw->port_info, tc_node, - (u8)i); + node = ice_sched_get_first_node(pi, tc_node, (u8)i); /* scan all the siblings */ while (node) { - if (node->num_children < hw->max_children[i]) + if (node->num_children < pi->hw->max_children[i]) break; node = node->sibling; } @@ -1588,14 +1702,13 @@ ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc) { u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 }; struct ice_sched_node *tc_node; - struct ice_hw *hw = pi->hw; tc_node = ice_sched_get_tc_node(pi, tc); if (!tc_node) return ICE_ERR_PARAM; /* calculate number of supported nodes needed for this VSI */ - ice_sched_calc_vsi_support_nodes(hw, tc_node, num_nodes); + ice_sched_calc_vsi_support_nodes(pi, tc_node, num_nodes); /* add VSI supported nodes to TC subtree */ return ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node, @@ -1628,7 +1741,7 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, if (!tc_node) return ICE_ERR_CFG; - vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle); + vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle); if (!vsi_node) return ICE_ERR_CFG; @@ -1691,7 +1804,7 @@ ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs, vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle); if (!vsi_ctx) return ICE_ERR_PARAM; - vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle); + vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle); /* suspend the VSI if TC is not enabled */ if (!enable) { @@ -1712,7 +1825,7 @@ ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs, if (status) return status; - vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle); + vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle); if (!vsi_node) return ICE_ERR_CFG; @@ -1821,7 +1934,7 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner) if (!tc_node) continue; - vsi_node = ice_sched_get_vsi_node(pi->hw, tc_node, vsi_handle); + vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle); if (!vsi_node) continue; @@ -1874,6 +1987,720 @@ enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle) } /** + * ice_get_agg_info - get the aggregator ID + * @hw: pointer to the hardware structure + * @agg_id: aggregator ID + * + * This function validates aggregator ID. The function returns info if + * aggregator ID is present in list otherwise it returns null. + */ +static struct ice_sched_agg_info * +ice_get_agg_info(struct ice_hw *hw, u32 agg_id) +{ + struct ice_sched_agg_info *agg_info; + + list_for_each_entry(agg_info, &hw->agg_list, list_entry) + if (agg_info->agg_id == agg_id) + return agg_info; + + return NULL; +} + +/** + * ice_sched_get_free_vsi_parent - Find a free parent node in aggregator subtree + * @hw: pointer to the HW struct + * @node: pointer to a child node + * @num_nodes: num nodes count array + * + * This function walks through the aggregator subtree to find a free parent + * node + */ +static struct ice_sched_node * +ice_sched_get_free_vsi_parent(struct ice_hw *hw, struct ice_sched_node *node, + u16 *num_nodes) +{ + u8 l = node->tx_sched_layer; + u8 vsil, i; + + vsil = ice_sched_get_vsi_layer(hw); + + /* Is it VSI parent layer ? */ + if (l == vsil - 1) + return (node->num_children < hw->max_children[l]) ? node : NULL; + + /* We have intermediate nodes. Let's walk through the subtree. If the + * intermediate node has space to add a new node then clear the count + */ + if (node->num_children < hw->max_children[l]) + num_nodes[l] = 0; + /* The below recursive call is intentional and wouldn't go more than + * 2 or 3 iterations. + */ + + for (i = 0; i < node->num_children; i++) { + struct ice_sched_node *parent; + + parent = ice_sched_get_free_vsi_parent(hw, node->children[i], + num_nodes); + if (parent) + return parent; + } + + return NULL; +} + +/** + * ice_sched_update_parent - update the new parent in SW DB + * @new_parent: pointer to a new parent node + * @node: pointer to a child node + * + * This function removes the child from the old parent and adds it to a new + * parent + */ +static void +ice_sched_update_parent(struct ice_sched_node *new_parent, + struct ice_sched_node *node) +{ + struct ice_sched_node *old_parent; + u8 i, j; + + old_parent = node->parent; + + /* update the old parent children */ + for (i = 0; i < old_parent->num_children; i++) + if (old_parent->children[i] == node) { + for (j = i + 1; j < old_parent->num_children; j++) + old_parent->children[j - 1] = + old_parent->children[j]; + old_parent->num_children--; + break; + } + + /* now move the node to a new parent */ + new_parent->children[new_parent->num_children++] = node; + node->parent = new_parent; + node->info.parent_teid = new_parent->info.node_teid; +} + +/** + * ice_sched_move_nodes - move child nodes to a given parent + * @pi: port information structure + * @parent: pointer to parent node + * @num_items: number of child nodes to be moved + * @list: pointer to child node teids + * + * This function move the child nodes to a given parent. + */ +static enum ice_status +ice_sched_move_nodes(struct ice_port_info *pi, struct ice_sched_node *parent, + u16 num_items, u32 *list) +{ + struct ice_aqc_move_elem *buf; + struct ice_sched_node *node; + enum ice_status status = 0; + u16 i, grps_movd = 0; + struct ice_hw *hw; + u16 buf_len; + + hw = pi->hw; + + if (!parent || !num_items) + return ICE_ERR_PARAM; + + /* Does parent have enough space */ + if (parent->num_children + num_items > + hw->max_children[parent->tx_sched_layer]) + return ICE_ERR_AQ_FULL; + + buf_len = struct_size(buf, teid, 1); + buf = kzalloc(buf_len, GFP_KERNEL); + if (!buf) + return ICE_ERR_NO_MEMORY; + + for (i = 0; i < num_items; i++) { + node = ice_sched_find_node_by_teid(pi->root, list[i]); + if (!node) { + status = ICE_ERR_PARAM; + goto move_err_exit; + } + + buf->hdr.src_parent_teid = node->info.parent_teid; + buf->hdr.dest_parent_teid = parent->info.node_teid; + buf->teid[0] = node->info.node_teid; + buf->hdr.num_elems = cpu_to_le16(1); + status = ice_aq_move_sched_elems(hw, 1, buf, buf_len, + &grps_movd, NULL); + if (status && grps_movd != 1) { + status = ICE_ERR_CFG; + goto move_err_exit; + } + + /* update the SW DB */ + ice_sched_update_parent(parent, node); + } + +move_err_exit: + kfree(buf); + return status; +} + +/** + * ice_sched_move_vsi_to_agg - move VSI to aggregator node + * @pi: port information structure + * @vsi_handle: software VSI handle + * @agg_id: aggregator ID + * @tc: TC number + * + * This function moves a VSI to an aggregator node or its subtree. + * Intermediate nodes may be created if required. + */ +static enum ice_status +ice_sched_move_vsi_to_agg(struct ice_port_info *pi, u16 vsi_handle, u32 agg_id, + u8 tc) +{ + struct ice_sched_node *vsi_node, *agg_node, *tc_node, *parent; + u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 }; + u32 first_node_teid, vsi_teid; + enum ice_status status; + u16 num_nodes_added; + u8 aggl, vsil, i; + + tc_node = ice_sched_get_tc_node(pi, tc); + if (!tc_node) + return ICE_ERR_CFG; + + agg_node = ice_sched_get_agg_node(pi, tc_node, agg_id); + if (!agg_node) + return ICE_ERR_DOES_NOT_EXIST; + + vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle); + if (!vsi_node) + return ICE_ERR_DOES_NOT_EXIST; + + /* Is this VSI already part of given aggregator? */ + if (ice_sched_find_node_in_subtree(pi->hw, agg_node, vsi_node)) + return 0; + + aggl = ice_sched_get_agg_layer(pi->hw); + vsil = ice_sched_get_vsi_layer(pi->hw); + + /* set intermediate node count to 1 between aggregator and VSI layers */ + for (i = aggl + 1; i < vsil; i++) + num_nodes[i] = 1; + + /* Check if the aggregator subtree has any free node to add the VSI */ + for (i = 0; i < agg_node->num_children; i++) { + parent = ice_sched_get_free_vsi_parent(pi->hw, + agg_node->children[i], + num_nodes); + if (parent) + goto move_nodes; + } + + /* add new nodes */ + parent = agg_node; + for (i = aggl + 1; i < vsil; i++) { + status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i, + num_nodes[i], + &first_node_teid, + &num_nodes_added); + if (status || num_nodes[i] != num_nodes_added) + return ICE_ERR_CFG; + + /* The newly added node can be a new parent for the next + * layer nodes + */ + if (num_nodes_added) + parent = ice_sched_find_node_by_teid(tc_node, + first_node_teid); + else + parent = parent->children[0]; + + if (!parent) + return ICE_ERR_CFG; + } + +move_nodes: + vsi_teid = le32_to_cpu(vsi_node->info.node_teid); + return ice_sched_move_nodes(pi, parent, 1, &vsi_teid); +} + +/** + * ice_move_all_vsi_to_dflt_agg - move all VSI(s) to default aggregator + * @pi: port information structure + * @agg_info: aggregator info + * @tc: traffic class number + * @rm_vsi_info: true or false + * + * This function move all the VSI(s) to the default aggregator and delete + * aggregator VSI info based on passed in boolean parameter rm_vsi_info. The + * caller holds the scheduler lock. + */ +static enum ice_status +ice_move_all_vsi_to_dflt_agg(struct ice_port_info *pi, + struct ice_sched_agg_info *agg_info, u8 tc, + bool rm_vsi_info) +{ + struct ice_sched_agg_vsi_info *agg_vsi_info; + struct ice_sched_agg_vsi_info *tmp; + enum ice_status status = 0; + + list_for_each_entry_safe(agg_vsi_info, tmp, &agg_info->agg_vsi_list, + list_entry) { + u16 vsi_handle = agg_vsi_info->vsi_handle; + + /* Move VSI to default aggregator */ + if (!ice_is_tc_ena(agg_vsi_info->tc_bitmap[0], tc)) + continue; + + status = ice_sched_move_vsi_to_agg(pi, vsi_handle, + ICE_DFLT_AGG_ID, tc); + if (status) + break; + + clear_bit(tc, agg_vsi_info->tc_bitmap); + if (rm_vsi_info && !agg_vsi_info->tc_bitmap[0]) { + list_del(&agg_vsi_info->list_entry); + devm_kfree(ice_hw_to_dev(pi->hw), agg_vsi_info); + } + } + + return status; +} + +/** + * ice_sched_is_agg_inuse - check whether the aggregator is in use or not + * @pi: port information structure + * @node: node pointer + * + * This function checks whether the aggregator is attached with any VSI or not. + */ +static bool +ice_sched_is_agg_inuse(struct ice_port_info *pi, struct ice_sched_node *node) +{ + u8 vsil, i; + + vsil = ice_sched_get_vsi_layer(pi->hw); + if (node->tx_sched_layer < vsil - 1) { + for (i = 0; i < node->num_children; i++) + if (ice_sched_is_agg_inuse(pi, node->children[i])) + return true; + return false; + } else { + return node->num_children ? true : false; + } +} + +/** + * ice_sched_rm_agg_cfg - remove the aggregator node + * @pi: port information structure + * @agg_id: aggregator ID + * @tc: TC number + * + * This function removes the aggregator node and intermediate nodes if any + * from the given TC + */ +static enum ice_status +ice_sched_rm_agg_cfg(struct ice_port_info *pi, u32 agg_id, u8 tc) +{ + struct ice_sched_node *tc_node, *agg_node; + struct ice_hw *hw = pi->hw; + + tc_node = ice_sched_get_tc_node(pi, tc); + if (!tc_node) + return ICE_ERR_CFG; + + agg_node = ice_sched_get_agg_node(pi, tc_node, agg_id); + if (!agg_node) + return ICE_ERR_DOES_NOT_EXIST; + + /* Can't remove the aggregator node if it has children */ + if (ice_sched_is_agg_inuse(pi, agg_node)) + return ICE_ERR_IN_USE; + + /* need to remove the whole subtree if aggregator node is the + * only child. + */ + while (agg_node->tx_sched_layer > hw->sw_entry_point_layer) { + struct ice_sched_node *parent = agg_node->parent; + + if (!parent) + return ICE_ERR_CFG; + + if (parent->num_children > 1) + break; + + agg_node = parent; + } + + ice_free_sched_node(pi, agg_node); + return 0; +} + +/** + * ice_rm_agg_cfg_tc - remove aggregator configuration for TC + * @pi: port information structure + * @agg_info: aggregator ID + * @tc: TC number + * @rm_vsi_info: bool value true or false + * + * This function removes aggregator reference to VSI of given TC. It removes + * the aggregator configuration completely for requested TC. The caller needs + * to hold the scheduler lock. + */ +static enum ice_status +ice_rm_agg_cfg_tc(struct ice_port_info *pi, struct ice_sched_agg_info *agg_info, + u8 tc, bool rm_vsi_info) +{ + enum ice_status status = 0; + + /* If nothing to remove - return success */ + if (!ice_is_tc_ena(agg_info->tc_bitmap[0], tc)) + goto exit_rm_agg_cfg_tc; + + status = ice_move_all_vsi_to_dflt_agg(pi, agg_info, tc, rm_vsi_info); + if (status) + goto exit_rm_agg_cfg_tc; + + /* Delete aggregator node(s) */ + status = ice_sched_rm_agg_cfg(pi, agg_info->agg_id, tc); + if (status) + goto exit_rm_agg_cfg_tc; + + clear_bit(tc, agg_info->tc_bitmap); +exit_rm_agg_cfg_tc: + return status; +} + +/** + * ice_save_agg_tc_bitmap - save aggregator TC bitmap + * @pi: port information structure + * @agg_id: aggregator ID + * @tc_bitmap: 8 bits TC bitmap + * + * Save aggregator TC bitmap. This function needs to be called with scheduler + * lock held. + */ +static enum ice_status +ice_save_agg_tc_bitmap(struct ice_port_info *pi, u32 agg_id, + unsigned long *tc_bitmap) +{ + struct ice_sched_agg_info *agg_info; + + agg_info = ice_get_agg_info(pi->hw, agg_id); + if (!agg_info) + return ICE_ERR_PARAM; + bitmap_copy(agg_info->replay_tc_bitmap, tc_bitmap, + ICE_MAX_TRAFFIC_CLASS); + return 0; +} + +/** + * ice_sched_add_agg_cfg - create an aggregator node + * @pi: port information structure + * @agg_id: aggregator ID + * @tc: TC number + * + * This function creates an aggregator node and intermediate nodes if required + * for the given TC + */ +static enum ice_status +ice_sched_add_agg_cfg(struct ice_port_info *pi, u32 agg_id, u8 tc) +{ + struct ice_sched_node *parent, *agg_node, *tc_node; + u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 }; + enum ice_status status = 0; + struct ice_hw *hw = pi->hw; + u32 first_node_teid; + u16 num_nodes_added; + u8 i, aggl; + + tc_node = ice_sched_get_tc_node(pi, tc); + if (!tc_node) + return ICE_ERR_CFG; + + agg_node = ice_sched_get_agg_node(pi, tc_node, agg_id); + /* Does Agg node already exist ? */ + if (agg_node) + return status; + + aggl = ice_sched_get_agg_layer(hw); + + /* need one node in Agg layer */ + num_nodes[aggl] = 1; + + /* Check whether the intermediate nodes have space to add the + * new aggregator. If they are full, then SW needs to allocate a new + * intermediate node on those layers + */ + for (i = hw->sw_entry_point_layer; i < aggl; i++) { + parent = ice_sched_get_first_node(pi, tc_node, i); + + /* scan all the siblings */ + while (parent) { + if (parent->num_children < hw->max_children[i]) + break; + parent = parent->sibling; + } + + /* all the nodes are full, reserve one for this layer */ + if (!parent) + num_nodes[i]++; + } + + /* add the aggregator node */ + parent = tc_node; + for (i = hw->sw_entry_point_layer; i <= aggl; i++) { + if (!parent) + return ICE_ERR_CFG; + + status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i, + num_nodes[i], + &first_node_teid, + &num_nodes_added); + if (status || num_nodes[i] != num_nodes_added) + return ICE_ERR_CFG; + + /* The newly added node can be a new parent for the next + * layer nodes + */ + if (num_nodes_added) { + parent = ice_sched_find_node_by_teid(tc_node, + first_node_teid); + /* register aggregator ID with the aggregator node */ + if (parent && i == aggl) + parent->agg_id = agg_id; + } else { + parent = parent->children[0]; + } + } + + return 0; +} + +/** + * ice_sched_cfg_agg - configure aggregator node + * @pi: port information structure + * @agg_id: aggregator ID + * @agg_type: aggregator type queue, VSI, or aggregator group + * @tc_bitmap: bits TC bitmap + * + * It registers a unique aggregator node into scheduler services. It + * allows a user to register with a unique ID to track it's resources. + * The aggregator type determines if this is a queue group, VSI group + * or aggregator group. It then creates the aggregator node(s) for requested + * TC(s) or removes an existing aggregator node including its configuration + * if indicated via tc_bitmap. Call ice_rm_agg_cfg to release aggregator + * resources and remove aggregator ID. + * This function needs to be called with scheduler lock held. + */ +static enum ice_status +ice_sched_cfg_agg(struct ice_port_info *pi, u32 agg_id, + enum ice_agg_type agg_type, unsigned long *tc_bitmap) +{ + struct ice_sched_agg_info *agg_info; + enum ice_status status = 0; + struct ice_hw *hw = pi->hw; + u8 tc; + + agg_info = ice_get_agg_info(hw, agg_id); + if (!agg_info) { + /* Create new entry for new aggregator ID */ + agg_info = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*agg_info), + GFP_KERNEL); + if (!agg_info) + return ICE_ERR_NO_MEMORY; + + agg_info->agg_id = agg_id; + agg_info->agg_type = agg_type; + agg_info->tc_bitmap[0] = 0; + + /* Initialize the aggregator VSI list head */ + INIT_LIST_HEAD(&agg_info->agg_vsi_list); + + /* Add new entry in aggregator list */ + list_add(&agg_info->list_entry, &hw->agg_list); + } + /* Create aggregator node(s) for requested TC(s) */ + ice_for_each_traffic_class(tc) { + if (!ice_is_tc_ena(*tc_bitmap, tc)) { + /* Delete aggregator cfg TC if it exists previously */ + status = ice_rm_agg_cfg_tc(pi, agg_info, tc, false); + if (status) + break; + continue; + } + + /* Check if aggregator node for TC already exists */ + if (ice_is_tc_ena(agg_info->tc_bitmap[0], tc)) + continue; + + /* Create new aggregator node for TC */ + status = ice_sched_add_agg_cfg(pi, agg_id, tc); + if (status) + break; + + /* Save aggregator node's TC information */ + set_bit(tc, agg_info->tc_bitmap); + } + + return status; +} + +/** + * ice_cfg_agg - config aggregator node + * @pi: port information structure + * @agg_id: aggregator ID + * @agg_type: aggregator type queue, VSI, or aggregator group + * @tc_bitmap: bits TC bitmap + * + * This function configures aggregator node(s). + */ +enum ice_status +ice_cfg_agg(struct ice_port_info *pi, u32 agg_id, enum ice_agg_type agg_type, + u8 tc_bitmap) +{ + unsigned long bitmap = tc_bitmap; + enum ice_status status; + + mutex_lock(&pi->sched_lock); + status = ice_sched_cfg_agg(pi, agg_id, agg_type, + (unsigned long *)&bitmap); + if (!status) + status = ice_save_agg_tc_bitmap(pi, agg_id, + (unsigned long *)&bitmap); + mutex_unlock(&pi->sched_lock); + return status; +} + +/** + * ice_get_agg_vsi_info - get the aggregator ID + * @agg_info: aggregator info + * @vsi_handle: software VSI handle + * + * The function returns aggregator VSI info based on VSI handle. This function + * needs to be called with scheduler lock held. + */ +static struct ice_sched_agg_vsi_info * +ice_get_agg_vsi_info(struct ice_sched_agg_info *agg_info, u16 vsi_handle) +{ + struct ice_sched_agg_vsi_info *agg_vsi_info; + + list_for_each_entry(agg_vsi_info, &agg_info->agg_vsi_list, list_entry) + if (agg_vsi_info->vsi_handle == vsi_handle) + return agg_vsi_info; + + return NULL; +} + +/** + * ice_get_vsi_agg_info - get the aggregator info of VSI + * @hw: pointer to the hardware structure + * @vsi_handle: Sw VSI handle + * + * The function returns aggregator info of VSI represented via vsi_handle. The + * VSI has in this case a different aggregator than the default one. This + * function needs to be called with scheduler lock held. + */ +static struct ice_sched_agg_info * +ice_get_vsi_agg_info(struct ice_hw *hw, u16 vsi_handle) +{ + struct ice_sched_agg_info *agg_info; + + list_for_each_entry(agg_info, &hw->agg_list, list_entry) { + struct ice_sched_agg_vsi_info *agg_vsi_info; + + agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle); + if (agg_vsi_info) + return agg_info; + } + return NULL; +} + +/** + * ice_save_agg_vsi_tc_bitmap - save aggregator VSI TC bitmap + * @pi: port information structure + * @agg_id: aggregator ID + * @vsi_handle: software VSI handle + * @tc_bitmap: TC bitmap of enabled TC(s) + * + * Save VSI to aggregator TC bitmap. This function needs to call with scheduler + * lock held. + */ +static enum ice_status +ice_save_agg_vsi_tc_bitmap(struct ice_port_info *pi, u32 agg_id, u16 vsi_handle, + unsigned long *tc_bitmap) +{ + struct ice_sched_agg_vsi_info *agg_vsi_info; + struct ice_sched_agg_info *agg_info; + + agg_info = ice_get_agg_info(pi->hw, agg_id); + if (!agg_info) + return ICE_ERR_PARAM; + /* check if entry already exist */ + agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle); + if (!agg_vsi_info) + return ICE_ERR_PARAM; + bitmap_copy(agg_vsi_info->replay_tc_bitmap, tc_bitmap, + ICE_MAX_TRAFFIC_CLASS); + return 0; +} + +/** + * ice_sched_assoc_vsi_to_agg - associate/move VSI to new/default aggregator + * @pi: port information structure + * @agg_id: aggregator ID + * @vsi_handle: software VSI handle + * @tc_bitmap: TC bitmap of enabled TC(s) + * + * This function moves VSI to a new or default aggregator node. If VSI is + * already associated to the aggregator node then no operation is performed on + * the tree. This function needs to be called with scheduler lock held. + */ +static enum ice_status +ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, + u16 vsi_handle, unsigned long *tc_bitmap) +{ + struct ice_sched_agg_vsi_info *agg_vsi_info; + struct ice_sched_agg_info *agg_info; + enum ice_status status = 0; + struct ice_hw *hw = pi->hw; + u8 tc; + + if (!ice_is_vsi_valid(pi->hw, vsi_handle)) + return ICE_ERR_PARAM; + agg_info = ice_get_agg_info(hw, agg_id); + if (!agg_info) + return ICE_ERR_PARAM; + /* check if entry already exist */ + agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle); + if (!agg_vsi_info) { + /* Create new entry for VSI under aggregator list */ + agg_vsi_info = devm_kzalloc(ice_hw_to_dev(hw), + sizeof(*agg_vsi_info), GFP_KERNEL); + if (!agg_vsi_info) + return ICE_ERR_PARAM; + + /* add VSI ID into the aggregator list */ + agg_vsi_info->vsi_handle = vsi_handle; + list_add(&agg_vsi_info->list_entry, &agg_info->agg_vsi_list); + } + /* Move VSI node to new aggregator node for requested TC(s) */ + ice_for_each_traffic_class(tc) { + if (!ice_is_tc_ena(*tc_bitmap, tc)) + continue; + + /* Move VSI to new aggregator */ + status = ice_sched_move_vsi_to_agg(pi, vsi_handle, agg_id, tc); + if (status) + break; + + set_bit(tc, agg_vsi_info->tc_bitmap); + } + return status; +} + +/** * ice_sched_rm_unused_rl_prof - remove unused RL profile * @pi: port information structure * @@ -1955,7 +2782,6 @@ ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node, { struct ice_aqc_txsched_elem_data buf; struct ice_aqc_txsched_elem *data; - enum ice_status status; buf = node->info; data = &buf.data; @@ -1970,7 +2796,32 @@ ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node, } /* Configure element */ - status = ice_sched_update_elem(hw, node, &buf); + return ice_sched_update_elem(hw, node, &buf); +} + +/** + * ice_move_vsi_to_agg - moves VSI to new or default aggregator + * @pi: port information structure + * @agg_id: aggregator ID + * @vsi_handle: software VSI handle + * @tc_bitmap: TC bitmap of enabled TC(s) + * + * Move or associate VSI to a new or default aggregator node. + */ +enum ice_status +ice_move_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, u16 vsi_handle, + u8 tc_bitmap) +{ + unsigned long bitmap = tc_bitmap; + enum ice_status status; + + mutex_lock(&pi->sched_lock); + status = ice_sched_assoc_vsi_to_agg(pi, agg_id, vsi_handle, + (unsigned long *)&bitmap); + if (!status) + status = ice_save_agg_vsi_tc_bitmap(pi, agg_id, vsi_handle, + (unsigned long *)&bitmap); + mutex_unlock(&pi->sched_lock); return status; } @@ -2045,11 +2896,12 @@ static void ice_set_clear_shared_bw(struct ice_bw_type_info *bw_t_info, u32 bw) /** * ice_sched_calc_wakeup - calculate RL profile wakeup parameter + * @hw: pointer to the HW struct * @bw: bandwidth in Kbps * * This function calculates the wakeup parameter of RL profile. */ -static u16 ice_sched_calc_wakeup(s32 bw) +static u16 ice_sched_calc_wakeup(struct ice_hw *hw, s32 bw) { s64 bytes_per_sec, wakeup_int, wakeup_a, wakeup_b, wakeup_f; s32 wakeup_f_int; @@ -2057,7 +2909,7 @@ static u16 ice_sched_calc_wakeup(s32 bw) /* Get the wakeup integer value */ bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE); - wakeup_int = div64_long(ICE_RL_PROF_FREQUENCY, bytes_per_sec); + wakeup_int = div64_long(hw->psm_clk_freq, bytes_per_sec); if (wakeup_int > 63) { wakeup = (u16)((1 << 15) | wakeup_int); } else { @@ -2066,8 +2918,7 @@ static u16 ice_sched_calc_wakeup(s32 bw) */ wakeup_b = (s64)ICE_RL_PROF_MULTIPLIER * wakeup_int; wakeup_a = div64_long((s64)ICE_RL_PROF_MULTIPLIER * - ICE_RL_PROF_FREQUENCY, - bytes_per_sec); + hw->psm_clk_freq, bytes_per_sec); /* Get Fraction value */ wakeup_f = wakeup_a - wakeup_b; @@ -2087,13 +2938,15 @@ static u16 ice_sched_calc_wakeup(s32 bw) /** * ice_sched_bw_to_rl_profile - convert BW to profile parameters + * @hw: pointer to the HW struct * @bw: bandwidth in Kbps * @profile: profile parameters to return * * This function converts the BW to profile structure format. */ static enum ice_status -ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile) +ice_sched_bw_to_rl_profile(struct ice_hw *hw, u32 bw, + struct ice_aqc_rl_profile_elem *profile) { enum ice_status status = ICE_ERR_PARAM; s64 bytes_per_sec, ts_rate, mv_tmp; @@ -2113,7 +2966,7 @@ ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile) for (i = 0; i < 64; i++) { u64 pow_result = BIT_ULL(i); - ts_rate = div64_long((s64)ICE_RL_PROF_FREQUENCY, + ts_rate = div64_long((s64)hw->psm_clk_freq, pow_result * ICE_RL_PROF_TS_MULTIPLIER); if (ts_rate <= 0) continue; @@ -2137,7 +2990,7 @@ ice_sched_bw_to_rl_profile(u32 bw, struct ice_aqc_rl_profile_elem *profile) if (found) { u16 wm; - wm = ice_sched_calc_wakeup(bw); + wm = ice_sched_calc_wakeup(hw, bw); profile->rl_multiply = cpu_to_le16(mv); profile->wake_up_calc = cpu_to_le16(wm); profile->rl_encode = cpu_to_le16(encode); @@ -2206,7 +3059,7 @@ ice_sched_add_rl_profile(struct ice_port_info *pi, if (!rl_prof_elem) return NULL; - status = ice_sched_bw_to_rl_profile(bw, &rl_prof_elem->profile); + status = ice_sched_bw_to_rl_profile(hw, bw, &rl_prof_elem->profile); if (status) goto exit_add_rl_prof; @@ -2941,6 +3794,156 @@ ice_sched_replay_node_bw(struct ice_hw *hw, struct ice_sched_node *node, } /** + * ice_sched_get_ena_tc_bitmap - get enabled TC bitmap + * @pi: port info struct + * @tc_bitmap: 8 bits TC bitmap to check + * @ena_tc_bitmap: 8 bits enabled TC bitmap to return + * + * This function returns enabled TC bitmap in variable ena_tc_bitmap. Some TCs + * may be missing, it returns enabled TCs. This function needs to be called with + * scheduler lock held. + */ +static void +ice_sched_get_ena_tc_bitmap(struct ice_port_info *pi, + unsigned long *tc_bitmap, + unsigned long *ena_tc_bitmap) +{ + u8 tc; + + /* Some TC(s) may be missing after reset, adjust for replay */ + ice_for_each_traffic_class(tc) + if (ice_is_tc_ena(*tc_bitmap, tc) && + (ice_sched_get_tc_node(pi, tc))) + set_bit(tc, ena_tc_bitmap); +} + +/** + * ice_sched_replay_agg - recreate aggregator node(s) + * @hw: pointer to the HW struct + * + * This function recreate aggregator type nodes which are not replayed earlier. + * It also replay aggregator BW information. These aggregator nodes are not + * associated with VSI type node yet. + */ +void ice_sched_replay_agg(struct ice_hw *hw) +{ + struct ice_port_info *pi = hw->port_info; + struct ice_sched_agg_info *agg_info; + + mutex_lock(&pi->sched_lock); + list_for_each_entry(agg_info, &hw->agg_list, list_entry) + /* replay aggregator (re-create aggregator node) */ + if (!bitmap_equal(agg_info->tc_bitmap, agg_info->replay_tc_bitmap, + ICE_MAX_TRAFFIC_CLASS)) { + DECLARE_BITMAP(replay_bitmap, ICE_MAX_TRAFFIC_CLASS); + enum ice_status status; + + bitmap_zero(replay_bitmap, ICE_MAX_TRAFFIC_CLASS); + ice_sched_get_ena_tc_bitmap(pi, + agg_info->replay_tc_bitmap, + replay_bitmap); + status = ice_sched_cfg_agg(hw->port_info, + agg_info->agg_id, + ICE_AGG_TYPE_AGG, + replay_bitmap); + if (status) { + dev_info(ice_hw_to_dev(hw), + "Replay agg id[%d] failed\n", + agg_info->agg_id); + /* Move on to next one */ + continue; + } + } + mutex_unlock(&pi->sched_lock); +} + +/** + * ice_sched_replay_agg_vsi_preinit - Agg/VSI replay pre initialization + * @hw: pointer to the HW struct + * + * This function initialize aggregator(s) TC bitmap to zero. A required + * preinit step for replaying aggregators. + */ +void ice_sched_replay_agg_vsi_preinit(struct ice_hw *hw) +{ + struct ice_port_info *pi = hw->port_info; + struct ice_sched_agg_info *agg_info; + + mutex_lock(&pi->sched_lock); + list_for_each_entry(agg_info, &hw->agg_list, list_entry) { + struct ice_sched_agg_vsi_info *agg_vsi_info; + + agg_info->tc_bitmap[0] = 0; + list_for_each_entry(agg_vsi_info, &agg_info->agg_vsi_list, + list_entry) + agg_vsi_info->tc_bitmap[0] = 0; + } + mutex_unlock(&pi->sched_lock); +} + +/** + * ice_sched_replay_vsi_agg - replay aggregator & VSI to aggregator node(s) + * @hw: pointer to the HW struct + * @vsi_handle: software VSI handle + * + * This function replays aggregator node, VSI to aggregator type nodes, and + * their node bandwidth information. This function needs to be called with + * scheduler lock held. + */ +static enum ice_status +ice_sched_replay_vsi_agg(struct ice_hw *hw, u16 vsi_handle) +{ + DECLARE_BITMAP(replay_bitmap, ICE_MAX_TRAFFIC_CLASS); + struct ice_sched_agg_vsi_info *agg_vsi_info; + struct ice_port_info *pi = hw->port_info; + struct ice_sched_agg_info *agg_info; + enum ice_status status; + + bitmap_zero(replay_bitmap, ICE_MAX_TRAFFIC_CLASS); + if (!ice_is_vsi_valid(hw, vsi_handle)) + return ICE_ERR_PARAM; + agg_info = ice_get_vsi_agg_info(hw, vsi_handle); + if (!agg_info) + return 0; /* Not present in list - default Agg case */ + agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle); + if (!agg_vsi_info) + return 0; /* Not present in list - default Agg case */ + ice_sched_get_ena_tc_bitmap(pi, agg_info->replay_tc_bitmap, + replay_bitmap); + /* Replay aggregator node associated to vsi_handle */ + status = ice_sched_cfg_agg(hw->port_info, agg_info->agg_id, + ICE_AGG_TYPE_AGG, replay_bitmap); + if (status) + return status; + + bitmap_zero(replay_bitmap, ICE_MAX_TRAFFIC_CLASS); + ice_sched_get_ena_tc_bitmap(pi, agg_vsi_info->replay_tc_bitmap, + replay_bitmap); + /* Move this VSI (vsi_handle) to above aggregator */ + return ice_sched_assoc_vsi_to_agg(pi, agg_info->agg_id, vsi_handle, + replay_bitmap); +} + +/** + * ice_replay_vsi_agg - replay VSI to aggregator node + * @hw: pointer to the HW struct + * @vsi_handle: software VSI handle + * + * This function replays association of VSI to aggregator type nodes, and + * node bandwidth information. + */ +enum ice_status ice_replay_vsi_agg(struct ice_hw *hw, u16 vsi_handle) +{ + struct ice_port_info *pi = hw->port_info; + enum ice_status status; + + mutex_lock(&pi->sched_lock); + status = ice_sched_replay_vsi_agg(hw, vsi_handle); + mutex_unlock(&pi->sched_lock); + return status; +} + +/** * ice_sched_replay_q_bw - replay queue type node BW * @pi: port information structure * @q_ctx: queue context structure diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h index 0e55ae0d446f..9beef8f0ec76 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.h +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -8,6 +8,7 @@ #define ICE_QGRP_LAYER_OFFSET 2 #define ICE_VSI_LAYER_OFFSET 4 +#define ICE_AGG_LAYER_OFFSET 6 #define ICE_SCHED_INVAL_LAYER_NUM 0xFF /* Burst size is a 12 bits register that is configured while creating the RL * profile(s). MSB is a granularity bit and tells the granularity type @@ -23,12 +24,16 @@ ((BIT(11) - 1) * 64) /* In Bytes */ #define ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY ICE_MAX_BURST_SIZE_ALLOWED -#define ICE_RL_PROF_FREQUENCY 446000000 #define ICE_RL_PROF_ACCURACY_BYTES 128 #define ICE_RL_PROF_MULTIPLIER 10000 #define ICE_RL_PROF_TS_MULTIPLIER 32 #define ICE_RL_PROF_FRACTION 512 +#define ICE_PSM_CLK_367MHZ_IN_HZ 367647059 +#define ICE_PSM_CLK_416MHZ_IN_HZ 416666667 +#define ICE_PSM_CLK_446MHZ_IN_HZ 446428571 +#define ICE_PSM_CLK_390MHZ_IN_HZ 390625000 + /* BW rate limit profile parameters list entry along * with bandwidth maintained per layer in port info */ @@ -43,6 +48,8 @@ struct ice_sched_agg_vsi_info { struct list_head list_entry; DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS); u16 vsi_handle; + /* save aggregator VSI TC bitmap */ + DECLARE_BITMAP(replay_tc_bitmap, ICE_MAX_TRAFFIC_CLASS); }; struct ice_sched_agg_info { @@ -51,6 +58,8 @@ struct ice_sched_agg_info { DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS); u32 agg_id; enum ice_agg_type agg_type; + /* save aggregator TC bitmap */ + DECLARE_BITMAP(replay_tc_bitmap, ICE_MAX_TRAFFIC_CLASS); }; /* FW AQ command calls */ @@ -60,6 +69,8 @@ ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req, u16 *elems_ret, struct ice_sq_cd *cd); enum ice_status ice_sched_init_port(struct ice_port_info *pi); enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw); +void ice_sched_get_psm_clk_freq(struct ice_hw *hw); + void ice_sched_clear_port(struct ice_port_info *pi); void ice_sched_cleanup_all(struct ice_hw *hw); void ice_sched_clear_agg(struct ice_hw *hw); @@ -78,6 +89,14 @@ enum ice_status ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs, u8 owner, bool enable); enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle); + +/* Tx scheduler rate limiter functions */ +enum ice_status +ice_cfg_agg(struct ice_port_info *pi, u32 agg_id, + enum ice_agg_type agg_type, u8 tc_bitmap); +enum ice_status +ice_move_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, u16 vsi_handle, + u8 tc_bitmap); enum ice_status ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle, enum ice_rl_type rl_type, u32 bw); @@ -85,6 +104,9 @@ enum ice_status ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle, enum ice_rl_type rl_type); enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes); +void ice_sched_replay_agg_vsi_preinit(struct ice_hw *hw); +void ice_sched_replay_agg(struct ice_hw *hw); +enum ice_status ice_replay_vsi_agg(struct ice_hw *hw, u16 vsi_handle); enum ice_status ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx); #endif /* _ICE_SCHED_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index c33612132ddf..67c965a3f5d2 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -603,7 +603,7 @@ enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw) } } while (req_desc && !status); - devm_kfree(ice_hw_to_dev(hw), (void *)rbuf); + devm_kfree(ice_hw_to_dev(hw), rbuf); return status; } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 8ca63c6a6ba4..580419813bb2 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1497,22 +1497,11 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector) struct ice_vsi *vsi = q_vector->vsi; u32 itr_val; - /* when exiting WB_ON_ITR lets set a low ITR value and trigger - * interrupts to expire right away in case we have more work ready to go - * already + /* when exiting WB_ON_ITR just reset the countdown and let ITR + * resume it's normal "interrupts-enabled" path */ - if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) { - itr_val = ice_buildreg_itr(rx->itr_idx, ICE_WB_ON_ITR_USECS); - wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); - /* set target back to last user set value */ - rx->target_itr = rx->itr_setting; - /* set current to what we just wrote and dynamic if needed */ - rx->current_itr = ICE_WB_ON_ITR_USECS | - (rx->itr_setting & ICE_ITR_DYNAMIC); - /* allow normal interrupt flow to start */ + if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) q_vector->itr_countdown = 0; - return; - } /* This will do nothing if dynamic updates are not enabled */ ice_update_itr(q_vector, tx); @@ -1552,10 +1541,8 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector) q_vector->itr_countdown--; } - if (!test_bit(__ICE_DOWN, q_vector->vsi->state)) - wr32(&q_vector->vsi->back->hw, - GLINT_DYN_CTL(q_vector->reg_idx), - itr_val); + if (!test_bit(__ICE_DOWN, vsi->state)) + wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); } /** @@ -1565,30 +1552,29 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector) * We need to tell hardware to write-back completed descriptors even when * interrupts are disabled. Descriptors will be written back on cache line * boundaries without WB_ON_ITR enabled, but if we don't enable WB_ON_ITR - * descriptors may not be written back if they don't fill a cache line until the - * next interrupt. + * descriptors may not be written back if they don't fill a cache line until + * the next interrupt. * - * This sets the write-back frequency to 2 microseconds as that is the minimum - * value that's not 0 due to ITR granularity. Also, set the INTENA_MSK bit to - * make sure hardware knows we aren't meddling with the INTENA_M bit. + * This sets the write-back frequency to whatever was set previously for the + * ITR indices. Also, set the INTENA_MSK bit to make sure hardware knows we + * aren't meddling with the INTENA_M bit. */ static void ice_set_wb_on_itr(struct ice_q_vector *q_vector) { struct ice_vsi *vsi = q_vector->vsi; - /* already in WB_ON_ITR mode no need to change it */ + /* already in wb_on_itr mode no need to change it */ if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) return; - if (q_vector->num_ring_rx) - wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), - ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS, - ICE_RX_ITR)); - - if (q_vector->num_ring_tx) - wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), - ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS, - ICE_TX_ITR)); + /* use previously set ITR values for all of the ITR indices by + * specifying ICE_ITR_NONE, which will vary in adaptive (AIM) mode and + * be static in non-adaptive mode (user configured) + */ + wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), + ((ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) & + GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M | + GLINT_DYN_CTL_WB_ON_ITR_M); q_vector->itr_countdown = ICE_IN_WB_ON_ITR_MODE; } @@ -1655,8 +1641,13 @@ int ice_napi_poll(struct napi_struct *napi, int budget) } /* If work not completed, return budget and polling will return */ - if (!clean_complete) + if (!clean_complete) { + /* Set the writeback on ITR so partial completions of + * cache-lines will still continue even if we're polling. + */ + ice_set_wb_on_itr(q_vector); return budget; + } /* Exit the polling mode, but don't re-enable interrupts if stack might * poll us due to busy-polling @@ -2413,7 +2404,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) /* allow CONTROL frames egress from main VSI if FW LLDP disabled */ if (unlikely(skb->priority == TC_PRIO_CONTROL && vsi->type == ICE_VSI_PF && - vsi->port_info->is_sw_lldp)) + vsi->port_info->qos_cfg.is_sw_lldp)) offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX | ICE_TX_CTX_DESC_SWTCH_UPLINK << ICE_TXD_CTX_QW1_CMD_S); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index ff1a1cbd078e..db56a0c8bfe1 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -240,7 +240,6 @@ enum ice_rx_dtype { #define ICE_DFLT_INTRL 0 #define ICE_MAX_INTRL 236 -#define ICE_WB_ON_ITR_USECS 2 #define ICE_IN_WB_ON_ITR_MODE 255 /* Sets WB_ON_ITR and assumes INTENA bit is already cleared, which allows * setting the MSK_M bit to tell hardware to ignore the INTENA_M bit. Also, diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index a98800a91045..a6cb0c35748c 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -403,7 +403,11 @@ struct ice_link_default_override_tlv { #define ice_for_each_traffic_class(_i) \ for ((_i) = 0; (_i) < ICE_MAX_TRAFFIC_CLASS; (_i)++) +/* ICE_DFLT_AGG_ID means that all new VM(s)/VSI node connects + * to driver defined policy for default aggregator + */ #define ICE_INVAL_TEID 0xFFFFFFFF +#define ICE_DFLT_AGG_ID 0 struct ice_sched_node { struct ice_sched_node *parent; @@ -552,6 +556,14 @@ struct ice_dcbx_cfg { #define ICE_DCBX_APPS_NON_WILLING 0x1 }; +struct ice_qos_cfg { + struct ice_dcbx_cfg local_dcbx_cfg; /* Oper/Local Cfg */ + struct ice_dcbx_cfg desired_dcbx_cfg; /* CEE Desired Cfg */ + struct ice_dcbx_cfg remote_dcbx_cfg; /* Peer Cfg */ + u8 dcbx_status : 3; /* see ICE_DCBX_STATUS_DIS */ + u8 is_sw_lldp : 1; +}; + struct ice_port_info { struct ice_sched_node *root; /* Root Node per Port */ struct ice_hw *hw; /* back pointer to HW instance */ @@ -575,13 +587,7 @@ struct ice_port_info { sib_head[ICE_MAX_TRAFFIC_CLASS][ICE_AQC_TOPO_MAX_LEVEL_NUM]; /* List contain profile ID(s) and other params per layer */ struct list_head rl_prof_list[ICE_AQC_TOPO_MAX_LEVEL_NUM]; - struct ice_dcbx_cfg local_dcbx_cfg; /* Oper/Local Cfg */ - /* DCBX info */ - struct ice_dcbx_cfg remote_dcbx_cfg; /* Peer Cfg */ - struct ice_dcbx_cfg desired_dcbx_cfg; /* CEE Desired Cfg */ - /* LLDP/DCBX Status */ - u8 dcbx_status:3; /* see ICE_DCBX_STATUS_DIS */ - u8 is_sw_lldp:1; + struct ice_qos_cfg qos_cfg; u8 is_vf:1; }; @@ -614,6 +620,8 @@ struct ice_hw { void *back; struct ice_aqc_layer_props *layer_info; struct ice_port_info *port_info; + /* PSM clock frequency for calculating RL profile params */ + u32 psm_clk_freq; u64 debug_mask; /* bitmap for debug mask */ enum ice_mac_type mac_type; @@ -902,4 +910,9 @@ struct ice_hw_port_stats { /* Hash redirection LUT for VSI - maximum array size */ #define ICE_VSIQF_HLUT_ARRAY_SIZE ((VSIQF_HLUT_MAX_INDEX + 1) * 4) +/* AQ API version for LLDP_FILTER_CONTROL */ +#define ICE_FW_API_LLDP_FLTR_MAJ 1 +#define ICE_FW_API_LLDP_FLTR_MIN 7 +#define ICE_FW_API_LLDP_FLTR_PATCH 1 + #endif /* _ICE_TYPE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index ec7f6c64132e..bf5fd812ea0e 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -1057,11 +1057,45 @@ static void ice_vf_pre_vsi_rebuild(struct ice_vf *vf) } /** + * ice_vf_rebuild_aggregator_node_cfg - rebuild aggregator node config + * @vsi: Pointer to VSI + * + * This function moves VSI into corresponding scheduler aggregator node + * based on cached value of "aggregator node info" per VSI + */ +static void ice_vf_rebuild_aggregator_node_cfg(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + enum ice_status status; + struct device *dev; + + if (!vsi->agg_node) + return; + + dev = ice_pf_to_dev(pf); + if (vsi->agg_node->num_vsis == ICE_MAX_VSIS_IN_AGG_NODE) { + dev_dbg(dev, + "agg_id %u already has reached max_num_vsis %u\n", + vsi->agg_node->agg_id, vsi->agg_node->num_vsis); + return; + } + + status = ice_move_vsi_to_agg(pf->hw.port_info, vsi->agg_node->agg_id, + vsi->idx, vsi->tc_cfg.ena_tc); + if (status) + dev_dbg(dev, "unable to move VSI idx %u into aggregator %u node", + vsi->idx, vsi->agg_node->agg_id); + else + vsi->agg_node->num_vsis++; +} + +/** * ice_vf_rebuild_host_cfg - host admin configuration is persistent across reset * @vf: VF to rebuild host configuration on */ static void ice_vf_rebuild_host_cfg(struct ice_vf *vf) { + struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx]; struct device *dev = ice_pf_to_dev(vf->pf); ice_vf_set_host_trust_cfg(vf); @@ -1073,6 +1107,8 @@ static void ice_vf_rebuild_host_cfg(struct ice_vf *vf) if (ice_vf_rebuild_host_vlan_cfg(vf)) dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n", vf->vf_id); + /* rebuild aggregator node config for main VF VSI */ + ice_vf_rebuild_aggregator_node_cfg(vsi); } /** @@ -1677,6 +1713,8 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!num_vfs) { if (!pci_vfs_assigned(pdev)) { ice_free_vfs(pf); + if (pf->lag) + ice_enable_lag(pf->lag); return 0; } @@ -1688,6 +1726,8 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) if (err) return err; + if (pf->lag) + ice_disable_lag(pf->lag); return num_vfs; } @@ -2312,12 +2352,12 @@ bool ice_is_any_vf_in_promisc(struct ice_pf *pf) static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg) { enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + bool rm_promisc, alluni = false, allmulti = false; struct virtchnl_promisc_info *info = (struct virtchnl_promisc_info *)msg; struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; struct device *dev; - bool rm_promisc; int ret = 0; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { @@ -2344,8 +2384,13 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg) goto error_param; } - rm_promisc = !(info->flags & FLAG_VF_UNICAST_PROMISC) && - !(info->flags & FLAG_VF_MULTICAST_PROMISC); + if (info->flags & FLAG_VF_UNICAST_PROMISC) + alluni = true; + + if (info->flags & FLAG_VF_MULTICAST_PROMISC) + allmulti = true; + + rm_promisc = !allmulti && !alluni; if (vsi->num_vlan || vf->port_vlan_info) { struct ice_vsi *pf_vsi = ice_get_main_vsi(pf); @@ -2399,12 +2444,12 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg) enum ice_status status; u8 promisc_m; - if (info->flags & FLAG_VF_UNICAST_PROMISC) { + if (alluni) { if (vf->port_vlan_info || vsi->num_vlan) promisc_m = ICE_UCAST_VLAN_PROMISC_BITS; else promisc_m = ICE_UCAST_PROMISC_BITS; - } else if (info->flags & FLAG_VF_MULTICAST_PROMISC) { + } else if (allmulti) { if (vf->port_vlan_info || vsi->num_vlan) promisc_m = ICE_MCAST_VLAN_PROMISC_BITS; else @@ -2432,15 +2477,16 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg) } } - if (info->flags & FLAG_VF_MULTICAST_PROMISC) - set_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states); - else - clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states); + if (allmulti && + !test_and_set_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) + dev_info(dev, "VF %u successfully set multicast promiscuous mode\n", vf->vf_id); + else if (!allmulti && test_and_clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) + dev_info(dev, "VF %u successfully unset multicast promiscuous mode\n", vf->vf_id); - if (info->flags & FLAG_VF_UNICAST_PROMISC) - set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states); - else - clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states); + if (alluni && !test_and_set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states)) + dev_info(dev, "VF %u successfully set unicast promiscuous mode\n", vf->vf_id); + else if (!alluni && test_and_clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states)) + dev_info(dev, "VF %u successfully unset unicast promiscuous mode\n", vf->vf_id); error_param: return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 1782146db644..875fa0cbef56 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -260,45 +260,6 @@ free_buf: } /** - * ice_xsk_alloc_pools - allocate a buffer pool for an XDP socket - * @vsi: VSI to allocate the buffer pool on - * - * Returns 0 on success, negative on error - */ -static int ice_xsk_alloc_pools(struct ice_vsi *vsi) -{ - if (vsi->xsk_pools) - return 0; - - vsi->xsk_pools = kcalloc(vsi->num_xsk_pools, sizeof(*vsi->xsk_pools), - GFP_KERNEL); - - if (!vsi->xsk_pools) { - vsi->num_xsk_pools = 0; - return -ENOMEM; - } - - return 0; -} - -/** - * ice_xsk_remove_pool - Remove an buffer pool for a certain ring/qid - * @vsi: VSI from which the VSI will be removed - * @qid: Ring/qid associated with the buffer pool - */ -static void ice_xsk_remove_pool(struct ice_vsi *vsi, u16 qid) -{ - vsi->xsk_pools[qid] = NULL; - vsi->num_xsk_pools_used--; - - if (vsi->num_xsk_pools_used == 0) { - kfree(vsi->xsk_pools); - vsi->xsk_pools = NULL; - vsi->num_xsk_pools = 0; - } -} - -/** * ice_xsk_pool_disable - disable a buffer pool region * @vsi: Current VSI * @qid: queue ID @@ -307,12 +268,12 @@ static void ice_xsk_remove_pool(struct ice_vsi *vsi, u16 qid) */ static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid) { - if (!vsi->xsk_pools || qid >= vsi->num_xsk_pools || - !vsi->xsk_pools[qid]) + struct xsk_buff_pool *pool = xsk_get_pool_from_qid(vsi->netdev, qid); + + if (!pool) return -EINVAL; - xsk_pool_dma_unmap(vsi->xsk_pools[qid], ICE_RX_DMA_ATTR); - ice_xsk_remove_pool(vsi, qid); + xsk_pool_dma_unmap(pool, ICE_RX_DMA_ATTR); return 0; } @@ -333,22 +294,11 @@ ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) if (vsi->type != ICE_VSI_PF) return -EINVAL; - if (!vsi->num_xsk_pools) - vsi->num_xsk_pools = min_t(u16, vsi->num_rxq, vsi->num_txq); - if (qid >= vsi->num_xsk_pools) + if (qid >= vsi->netdev->real_num_rx_queues || + qid >= vsi->netdev->real_num_tx_queues) return -EINVAL; - err = ice_xsk_alloc_pools(vsi); - if (err) - return err; - - if (vsi->xsk_pools && vsi->xsk_pools[qid]) - return -EBUSY; - - vsi->xsk_pools[qid] = pool; - vsi->num_xsk_pools_used++; - - err = xsk_pool_dma_map(vsi->xsk_pools[qid], ice_pf_to_dev(vsi->back), + err = xsk_pool_dma_map(pool, ice_pf_to_dev(vsi->back), ICE_RX_DMA_ATTR); if (err) return err; @@ -842,11 +792,8 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi) { int i; - if (!vsi->xsk_pools) - return false; - - for (i = 0; i < vsi->num_xsk_pools; i++) { - if (vsi->xsk_pools[i]) + ice_for_each_rxq(vsi, i) { + if (xsk_get_pool_from_qid(vsi->netdev, i)) return true; } diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index 6bd7e405e830..da8715297a9a 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -60,6 +60,9 @@ /* Top Registers */ #define MVPP2_MH_REG(port) (0x5040 + 4 * (port)) #define MVPP2_DSA_EXTENDED BIT(5) +#define MVPP2_VER_ID_REG 0x50b0 +#define MVPP2_VER_PP22 0x10 +#define MVPP2_VER_PP23 0x11 /* Parser Registers */ #define MVPP2_PRS_INIT_LOOKUP_REG 0x1000 @@ -292,6 +295,8 @@ #define MVPP2_PON_CAUSE_TXP_OCCUP_DESC_ALL_MASK 0x3fc00000 #define MVPP2_PON_CAUSE_MISC_SUM_MASK BIT(31) #define MVPP2_ISR_MISC_CAUSE_REG 0x55b0 +#define MVPP2_ISR_RX_ERR_CAUSE_REG(port) (0x5520 + 4 * (port)) +#define MVPP2_ISR_RX_ERR_CAUSE_NONOCC_MASK 0x00ff /* Buffer Manager registers */ #define MVPP2_BM_POOL_BASE_REG(pool) (0x6000 + ((pool) * 4)) @@ -319,6 +324,10 @@ #define MVPP2_BM_HIGH_THRESH_MASK 0x7f0000 #define MVPP2_BM_HIGH_THRESH_VALUE(val) ((val) << \ MVPP2_BM_HIGH_THRESH_OFFS) +#define MVPP2_BM_BPPI_HIGH_THRESH 0x1E +#define MVPP2_BM_BPPI_LOW_THRESH 0x1C +#define MVPP23_BM_BPPI_HIGH_THRESH 0x34 +#define MVPP23_BM_BPPI_LOW_THRESH 0x28 #define MVPP2_BM_INTR_CAUSE_REG(pool) (0x6240 + ((pool) * 4)) #define MVPP2_BM_RELEASED_DELAY_MASK BIT(0) #define MVPP2_BM_ALLOC_FAILED_MASK BIT(1) @@ -347,6 +356,10 @@ #define MVPP2_OVERRUN_ETH_DROP 0x7000 #define MVPP2_CLS_ETH_DROP 0x7020 +#define MVPP22_BM_POOL_BASE_ADDR_HIGH_REG 0x6310 +#define MVPP22_BM_POOL_BASE_ADDR_HIGH_MASK 0xff +#define MVPP23_BM_8POOL_MODE BIT(8) + /* Hit counters registers */ #define MVPP2_CTRS_IDX 0x7040 #define MVPP22_CTRS_TX_CTR(port, txq) ((txq) | ((port) << 3) | BIT(7)) @@ -469,7 +482,7 @@ #define MVPP22_GMAC_INT_SUM_MASK_LINK_STAT BIT(1) #define MVPP22_GMAC_INT_SUM_MASK_PTP BIT(2) -/* Per-port XGMAC registers. PPv2.2 only, only for GOP port 0, +/* Per-port XGMAC registers. PPv2.2 and PPv2.3, only for GOP port 0, * relative to port->base. */ #define MVPP22_XLG_CTRL0_REG 0x100 @@ -506,7 +519,7 @@ #define MVPP22_XLG_CTRL4_MACMODSELECT_GMAC BIT(12) #define MVPP22_XLG_CTRL4_EN_IDLE_CHECK BIT(14) -/* SMI registers. PPv2.2 only, relative to priv->iface_base. */ +/* SMI registers. PPv2.2 and PPv2.3, relative to priv->iface_base. */ #define MVPP22_SMI_MISC_CFG_REG 0x1204 #define MVPP22_SMI_POLLING_EN BIT(10) @@ -582,7 +595,7 @@ #define MVPP2_QUEUE_NEXT_DESC(q, index) \ (((index) < (q)->last_desc) ? ((index) + 1) : 0) -/* XPCS registers. PPv2.2 only */ +/* XPCS registers.PPv2.2 and PPv2.3 */ #define MVPP22_MPCS_BASE(port) (0x7000 + (port) * 0x1000) #define MVPP22_MPCS_CTRL 0x14 #define MVPP22_MPCS_CTRL_FWD_ERR_CONN BIT(10) @@ -593,7 +606,16 @@ #define MVPP22_MPCS_CLK_RESET_DIV_RATIO(n) ((n) << 4) #define MVPP22_MPCS_CLK_RESET_DIV_SET BIT(11) -/* XPCS registers. PPv2.2 only */ +/* FCA registers. PPv2.2 and PPv2.3 */ +#define MVPP22_FCA_BASE(port) (0x7600 + (port) * 0x1000) +#define MVPP22_FCA_REG_SIZE 16 +#define MVPP22_FCA_REG_MASK 0xFFFF +#define MVPP22_FCA_CONTROL_REG 0x0 +#define MVPP22_FCA_ENABLE_PERIODIC BIT(11) +#define MVPP22_PERIODIC_COUNTER_LSB_REG (0x110) +#define MVPP22_PERIODIC_COUNTER_MSB_REG (0x114) + +/* XPCS registers. PPv2.2 and PPv2.3 */ #define MVPP22_XPCS_BASE(port) (0x7400 + (port) * 0x1000) #define MVPP22_XPCS_CFG0 0x0 #define MVPP22_XPCS_CFG0_RESET_DIS BIT(0) @@ -712,8 +734,8 @@ #define MVPP2_PORT_MAX_RXQ 32 /* Max number of Rx descriptors */ -#define MVPP2_MAX_RXD_MAX 1024 -#define MVPP2_MAX_RXD_DFLT 128 +#define MVPP2_MAX_RXD_MAX 2048 +#define MVPP2_MAX_RXD_DFLT 1024 /* Max number of Tx descriptors */ #define MVPP2_MAX_TXD_MAX 2048 @@ -748,6 +770,66 @@ #define MVPP2_TX_FIFO_THRESHOLD(kb) \ ((kb) * 1024 - MVPP2_TX_FIFO_THRESHOLD_MIN) +/* RX FIFO threshold in 1KB granularity */ +#define MVPP23_PORT0_FIFO_TRSH (9 * 1024) +#define MVPP23_PORT1_FIFO_TRSH (4 * 1024) +#define MVPP23_PORT2_FIFO_TRSH (2 * 1024) + +/* RX Flow Control Registers */ +#define MVPP2_RX_FC_REG(port) (0x150 + 4 * (port)) +#define MVPP2_RX_FC_EN BIT(24) +#define MVPP2_RX_FC_TRSH_OFFS 16 +#define MVPP2_RX_FC_TRSH_MASK (0xFF << MVPP2_RX_FC_TRSH_OFFS) +#define MVPP2_RX_FC_TRSH_UNIT 256 + +/* MSS Flow control */ +#define MSS_FC_COM_REG 0 +#define FLOW_CONTROL_ENABLE_BIT BIT(0) +#define FLOW_CONTROL_UPDATE_COMMAND_BIT BIT(31) +#define FC_QUANTA 0xFFFF +#define FC_CLK_DIVIDER 100 + +#define MSS_RXQ_TRESH_BASE 0x200 +#define MSS_RXQ_TRESH_OFFS 4 +#define MSS_RXQ_TRESH_REG(q, fq) (MSS_RXQ_TRESH_BASE + (((q) + (fq)) \ + * MSS_RXQ_TRESH_OFFS)) + +#define MSS_BUF_POOL_BASE 0x40 +#define MSS_BUF_POOL_OFFS 4 +#define MSS_BUF_POOL_REG(id) (MSS_BUF_POOL_BASE \ + + (id) * MSS_BUF_POOL_OFFS) + +#define MSS_BUF_POOL_STOP_MASK 0xFFF +#define MSS_BUF_POOL_START_MASK (0xFFF << MSS_BUF_POOL_START_OFFS) +#define MSS_BUF_POOL_START_OFFS 12 +#define MSS_BUF_POOL_PORTS_MASK (0xF << MSS_BUF_POOL_PORTS_OFFS) +#define MSS_BUF_POOL_PORTS_OFFS 24 +#define MSS_BUF_POOL_PORT_OFFS(id) (0x1 << \ + ((id) + MSS_BUF_POOL_PORTS_OFFS)) + +#define MSS_RXQ_TRESH_START_MASK 0xFFFF +#define MSS_RXQ_TRESH_STOP_MASK (0xFFFF << MSS_RXQ_TRESH_STOP_OFFS) +#define MSS_RXQ_TRESH_STOP_OFFS 16 + +#define MSS_RXQ_ASS_BASE 0x80 +#define MSS_RXQ_ASS_OFFS 4 +#define MSS_RXQ_ASS_PER_REG 4 +#define MSS_RXQ_ASS_PER_OFFS 8 +#define MSS_RXQ_ASS_PORTID_OFFS 0 +#define MSS_RXQ_ASS_PORTID_MASK 0x3 +#define MSS_RXQ_ASS_HOSTID_OFFS 2 +#define MSS_RXQ_ASS_HOSTID_MASK 0x3F + +#define MSS_RXQ_ASS_Q_BASE(q, fq) ((((q) + (fq)) % MSS_RXQ_ASS_PER_REG) \ + * MSS_RXQ_ASS_PER_OFFS) +#define MSS_RXQ_ASS_PQ_BASE(q, fq) ((((q) + (fq)) / MSS_RXQ_ASS_PER_REG) \ + * MSS_RXQ_ASS_OFFS) +#define MSS_RXQ_ASS_REG(q, fq) (MSS_RXQ_ASS_BASE + MSS_RXQ_ASS_PQ_BASE(q, fq)) + +#define MSS_THRESHOLD_STOP 768 +#define MSS_THRESHOLD_START 1024 +#define MSS_FC_MAX_TIMEOUT 5000 + /* RX buffer constants */ #define MVPP2_SKB_SHINFO_SIZE \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) @@ -845,8 +927,8 @@ enum mvpp22_ptp_packet_format { #define MVPP22_PTP_TIMESTAMPQUEUESELECT BIT(18) /* BM constants */ -#define MVPP2_BM_JUMBO_BUF_NUM 512 -#define MVPP2_BM_LONG_BUF_NUM 1024 +#define MVPP2_BM_JUMBO_BUF_NUM 2048 +#define MVPP2_BM_LONG_BUF_NUM 2048 #define MVPP2_BM_SHORT_BUF_NUM 2048 #define MVPP2_BM_POOL_SIZE_MAX (16*1024 - MVPP2_BM_POOL_PTR_ALIGN/4) #define MVPP2_BM_POOL_PTR_ALIGN 128 @@ -925,16 +1007,18 @@ struct mvpp2 { /* Shared registers' base addresses */ void __iomem *lms_base; void __iomem *iface_base; + void __iomem *cm3_base; - /* On PPv2.2, each "software thread" can access the base + /* On PPv2.2 and PPv2.3, each "software thread" can access the base * register through a separate address space, each 64 KB apart * from each other. Typically, such address spaces will be * used per CPU. */ void __iomem *swth_base[MVPP2_MAX_THREADS]; - /* On PPv2.2, some port control registers are located into the system - * controller space. These registers are accessible through a regmap. + /* On PPv2.2 and PPv2.3, some port control registers are located into + * the system controller space. These registers are accessible + * through a regmap. */ struct regmap *sysctrl_base; @@ -976,7 +1060,7 @@ struct mvpp2 { u32 tclk; /* HW version */ - enum { MVPP21, MVPP22 } hw_version; + enum { MVPP21, MVPP22, MVPP23 } hw_version; /* Maximum number of RXQs per port */ unsigned int max_port_rxqs; @@ -996,6 +1080,12 @@ struct mvpp2 { /* page_pool allocator */ struct page_pool *page_pool[MVPP2_PORT_MAX_RXQ]; + + /* Global TX Flow Control config */ + bool global_tx_fc; + + /* Spinlocks for CM3 shared memory configuration */ + spinlock_t mss_spinlock; }; struct mvpp2_pcpu_stats { @@ -1158,6 +1248,9 @@ struct mvpp2_port { bool rx_hwtstamp; enum hwtstamp_tx_types tx_hwtstamp_type; struct mvpp2_hwtstamp_queue tx_hwtstamp_queue[2]; + + /* Firmware TX flow control */ + bool tx_fc; }; /* The mvpp2_tx_desc and mvpp2_rx_desc structures describe the @@ -1220,7 +1313,7 @@ struct mvpp21_rx_desc { __le32 reserved8; }; -/* HW TX descriptor for PPv2.2 */ +/* HW TX descriptor for PPv2.2 and PPv2.3 */ struct mvpp22_tx_desc { __le32 command; u8 packet_offset; @@ -1232,7 +1325,7 @@ struct mvpp22_tx_desc { __le64 buf_cookie_misc; }; -/* HW RX descriptor for PPv2.2 */ +/* HW RX descriptor for PPv2.2 and PPv2.3 */ struct mvpp22_rx_desc { __le32 status; __le16 reserved1; @@ -1418,6 +1511,8 @@ void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name); void mvpp2_dbgfs_cleanup(struct mvpp2 *priv); +void mvpp23_rx_fifo_fc_en(struct mvpp2 *priv, int port, bool en); + #ifdef CONFIG_MVPP2_PTP int mvpp22_tai_probe(struct device *dev, struct mvpp2 *priv); void mvpp22_tai_tstamp(struct mvpp2_tai *tai, u32 tstamp, @@ -1450,4 +1545,5 @@ static inline bool mvpp22_rx_hwtstamping(struct mvpp2_port *port) { return IS_ENABLED(CONFIG_MVPP2_PTP) && port->rx_hwtstamp; } + #endif diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 143522908477..e88272f8b600 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -91,6 +91,16 @@ static inline u32 mvpp2_cpu_to_thread(struct mvpp2 *priv, int cpu) return cpu % priv->nthreads; } +static void mvpp2_cm3_write(struct mvpp2 *priv, u32 offset, u32 data) +{ + writel(data, priv->cm3_base + offset); +} + +static u32 mvpp2_cm3_read(struct mvpp2 *priv, u32 offset) +{ + return readl(priv->cm3_base + offset); +} + static struct page_pool * mvpp2_create_page_pool(struct device *dev, int num, int len, enum dma_data_direction dma_dir) @@ -319,7 +329,7 @@ static int mvpp2_get_nrxqs(struct mvpp2 *priv) { unsigned int nrxqs; - if (priv->hw_version == MVPP22 && queue_mode == MVPP2_QDIST_SINGLE_MODE) + if (priv->hw_version != MVPP21 && queue_mode == MVPP2_QDIST_SINGLE_MODE) return 1; /* According to the PPv2.2 datasheet and our experiments on @@ -384,7 +394,7 @@ static int mvpp2_bm_pool_create(struct device *dev, struct mvpp2 *priv, if (!IS_ALIGNED(size, 16)) return -EINVAL; - /* PPv2.1 needs 8 bytes per buffer pointer, PPv2.2 needs 16 + /* PPv2.1 needs 8 bytes per buffer pointer, PPv2.2 and PPv2.3 needs 16 * bytes per buffer pointer */ if (priv->hw_version == MVPP21) @@ -413,6 +423,19 @@ static int mvpp2_bm_pool_create(struct device *dev, struct mvpp2 *priv, val = mvpp2_read(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id)); val |= MVPP2_BM_START_MASK; + + val &= ~MVPP2_BM_LOW_THRESH_MASK; + val &= ~MVPP2_BM_HIGH_THRESH_MASK; + + /* Set 8 Pools BPPI threshold for MVPP23 */ + if (priv->hw_version == MVPP23) { + val |= MVPP2_BM_LOW_THRESH_VALUE(MVPP23_BM_BPPI_LOW_THRESH); + val |= MVPP2_BM_HIGH_THRESH_VALUE(MVPP23_BM_BPPI_HIGH_THRESH); + } else { + val |= MVPP2_BM_LOW_THRESH_VALUE(MVPP2_BM_BPPI_LOW_THRESH); + val |= MVPP2_BM_HIGH_THRESH_VALUE(MVPP2_BM_BPPI_HIGH_THRESH); + } + mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id), val); bm_pool->size = size; @@ -446,7 +469,7 @@ static void mvpp2_bm_bufs_get_addrs(struct device *dev, struct mvpp2 *priv, MVPP2_BM_PHY_ALLOC_REG(bm_pool->id)); *phys_addr = mvpp2_thread_read(priv, thread, MVPP2_BM_VIRT_ALLOC_REG); - if (priv->hw_version == MVPP22) { + if (priv->hw_version != MVPP21) { u32 val; u32 dma_addr_highbits, phys_addr_highbits; @@ -581,6 +604,16 @@ err_unroll_pools: return err; } +/* Routine enable PPv23 8 pool mode */ +static void mvpp23_bm_set_8pool_mode(struct mvpp2 *priv) +{ + int val; + + val = mvpp2_read(priv, MVPP22_BM_POOL_BASE_ADDR_HIGH_REG); + val |= MVPP23_BM_8POOL_MODE; + mvpp2_write(priv, MVPP22_BM_POOL_BASE_ADDR_HIGH_REG, val); +} + static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv) { enum dma_data_direction dma_dir = DMA_FROM_DEVICE; @@ -634,6 +667,9 @@ static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv) if (!priv->bm_pools) return -ENOMEM; + if (priv->hw_version == MVPP23) + mvpp23_bm_set_8pool_mode(priv); + err = mvpp2_bm_pools_init(dev, priv); if (err < 0) return err; @@ -731,6 +767,191 @@ static void *mvpp2_buf_alloc(struct mvpp2_port *port, return data; } +/* Routine enable flow control for RXQs condition */ +static void mvpp2_rxq_enable_fc(struct mvpp2_port *port) +{ + int val, cm3_state, host_id, q; + int fq = port->first_rxq; + unsigned long flags; + + spin_lock_irqsave(&port->priv->mss_spinlock, flags); + + /* Remove Flow control enable bit to prevent race between FW and Kernel + * If Flow control was enabled, it would be re-enabled. + */ + val = mvpp2_cm3_read(port->priv, MSS_FC_COM_REG); + cm3_state = (val & FLOW_CONTROL_ENABLE_BIT); + val &= ~FLOW_CONTROL_ENABLE_BIT; + mvpp2_cm3_write(port->priv, MSS_FC_COM_REG, val); + + /* Set same Flow control for all RXQs */ + for (q = 0; q < port->nrxqs; q++) { + /* Set stop and start Flow control RXQ thresholds */ + val = MSS_THRESHOLD_START; + val |= (MSS_THRESHOLD_STOP << MSS_RXQ_TRESH_STOP_OFFS); + mvpp2_cm3_write(port->priv, MSS_RXQ_TRESH_REG(q, fq), val); + + val = mvpp2_cm3_read(port->priv, MSS_RXQ_ASS_REG(q, fq)); + /* Set RXQ port ID */ + val &= ~(MSS_RXQ_ASS_PORTID_MASK << MSS_RXQ_ASS_Q_BASE(q, fq)); + val |= (port->id << MSS_RXQ_ASS_Q_BASE(q, fq)); + val &= ~(MSS_RXQ_ASS_HOSTID_MASK << (MSS_RXQ_ASS_Q_BASE(q, fq) + + MSS_RXQ_ASS_HOSTID_OFFS)); + + /* Calculate RXQ host ID: + * In Single queue mode: Host ID equal to Host ID used for + * shared RX interrupt + * In Multi queue mode: Host ID equal to number of + * RXQ ID / number of CoS queues + * In Single resource mode: Host ID always equal to 0 + */ + if (queue_mode == MVPP2_QDIST_SINGLE_MODE) + host_id = port->nqvecs; + else if (queue_mode == MVPP2_QDIST_MULTI_MODE) + host_id = q; + else + host_id = 0; + + /* Set RXQ host ID */ + val |= (host_id << (MSS_RXQ_ASS_Q_BASE(q, fq) + + MSS_RXQ_ASS_HOSTID_OFFS)); + + mvpp2_cm3_write(port->priv, MSS_RXQ_ASS_REG(q, fq), val); + } + + /* Notify Firmware that Flow control config space ready for update */ + val = mvpp2_cm3_read(port->priv, MSS_FC_COM_REG); + val |= FLOW_CONTROL_UPDATE_COMMAND_BIT; + val |= cm3_state; + mvpp2_cm3_write(port->priv, MSS_FC_COM_REG, val); + + spin_unlock_irqrestore(&port->priv->mss_spinlock, flags); +} + +/* Routine disable flow control for RXQs condition */ +static void mvpp2_rxq_disable_fc(struct mvpp2_port *port) +{ + int val, cm3_state, q; + unsigned long flags; + int fq = port->first_rxq; + + spin_lock_irqsave(&port->priv->mss_spinlock, flags); + + /* Remove Flow control enable bit to prevent race between FW and Kernel + * If Flow control was enabled, it would be re-enabled. + */ + val = mvpp2_cm3_read(port->priv, MSS_FC_COM_REG); + cm3_state = (val & FLOW_CONTROL_ENABLE_BIT); + val &= ~FLOW_CONTROL_ENABLE_BIT; + mvpp2_cm3_write(port->priv, MSS_FC_COM_REG, val); + + /* Disable Flow control for all RXQs */ + for (q = 0; q < port->nrxqs; q++) { + /* Set threshold 0 to disable Flow control */ + val = 0; + val |= (0 << MSS_RXQ_TRESH_STOP_OFFS); + mvpp2_cm3_write(port->priv, MSS_RXQ_TRESH_REG(q, fq), val); + + val = mvpp2_cm3_read(port->priv, MSS_RXQ_ASS_REG(q, fq)); + + val &= ~(MSS_RXQ_ASS_PORTID_MASK << MSS_RXQ_ASS_Q_BASE(q, fq)); + + val &= ~(MSS_RXQ_ASS_HOSTID_MASK << (MSS_RXQ_ASS_Q_BASE(q, fq) + + MSS_RXQ_ASS_HOSTID_OFFS)); + + mvpp2_cm3_write(port->priv, MSS_RXQ_ASS_REG(q, fq), val); + } + + /* Notify Firmware that Flow control config space ready for update */ + val = mvpp2_cm3_read(port->priv, MSS_FC_COM_REG); + val |= FLOW_CONTROL_UPDATE_COMMAND_BIT; + val |= cm3_state; + mvpp2_cm3_write(port->priv, MSS_FC_COM_REG, val); + + spin_unlock_irqrestore(&port->priv->mss_spinlock, flags); +} + +/* Routine disable/enable flow control for BM pool condition */ +static void mvpp2_bm_pool_update_fc(struct mvpp2_port *port, + struct mvpp2_bm_pool *pool, + bool en) +{ + int val, cm3_state; + unsigned long flags; + + spin_lock_irqsave(&port->priv->mss_spinlock, flags); + + /* Remove Flow control enable bit to prevent race between FW and Kernel + * If Flow control were enabled, it would be re-enabled. + */ + val = mvpp2_cm3_read(port->priv, MSS_FC_COM_REG); + cm3_state = (val & FLOW_CONTROL_ENABLE_BIT); + val &= ~FLOW_CONTROL_ENABLE_BIT; + mvpp2_cm3_write(port->priv, MSS_FC_COM_REG, val); + + /* Check if BM pool should be enabled/disable */ + if (en) { + /* Set BM pool start and stop thresholds per port */ + val = mvpp2_cm3_read(port->priv, MSS_BUF_POOL_REG(pool->id)); + val |= MSS_BUF_POOL_PORT_OFFS(port->id); + val &= ~MSS_BUF_POOL_START_MASK; + val |= (MSS_THRESHOLD_START << MSS_BUF_POOL_START_OFFS); + val &= ~MSS_BUF_POOL_STOP_MASK; + val |= MSS_THRESHOLD_STOP; + mvpp2_cm3_write(port->priv, MSS_BUF_POOL_REG(pool->id), val); + } else { + /* Remove BM pool from the port */ + val = mvpp2_cm3_read(port->priv, MSS_BUF_POOL_REG(pool->id)); + val &= ~MSS_BUF_POOL_PORT_OFFS(port->id); + + /* Zero BM pool start and stop thresholds to disable pool + * flow control if pool empty (not used by any port) + */ + if (!pool->buf_num) { + val &= ~MSS_BUF_POOL_START_MASK; + val &= ~MSS_BUF_POOL_STOP_MASK; + } + + mvpp2_cm3_write(port->priv, MSS_BUF_POOL_REG(pool->id), val); + } + + /* Notify Firmware that Flow control config space ready for update */ + val = mvpp2_cm3_read(port->priv, MSS_FC_COM_REG); + val |= FLOW_CONTROL_UPDATE_COMMAND_BIT; + val |= cm3_state; + mvpp2_cm3_write(port->priv, MSS_FC_COM_REG, val); + + spin_unlock_irqrestore(&port->priv->mss_spinlock, flags); +} + +static int mvpp2_enable_global_fc(struct mvpp2 *priv) +{ + int val, timeout = 0; + + /* Enable global flow control. In this stage global + * flow control enabled, but still disabled per port. + */ + val = mvpp2_cm3_read(priv, MSS_FC_COM_REG); + val |= FLOW_CONTROL_ENABLE_BIT; + mvpp2_cm3_write(priv, MSS_FC_COM_REG, val); + + /* Check if Firmware running and disable FC if not*/ + val |= FLOW_CONTROL_UPDATE_COMMAND_BIT; + mvpp2_cm3_write(priv, MSS_FC_COM_REG, val); + + while (timeout < MSS_FC_MAX_TIMEOUT) { + val = mvpp2_cm3_read(priv, MSS_FC_COM_REG); + + if (!(val & FLOW_CONTROL_UPDATE_COMMAND_BIT)) + return 0; + usleep_range(10, 20); + timeout++; + } + + priv->global_tx_fc = false; + return -EOPNOTSUPP; +} + /* Release buffer to BM */ static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool, dma_addr_t buf_dma_addr, @@ -742,7 +963,7 @@ static inline void mvpp2_bm_pool_put(struct mvpp2_port *port, int pool, if (test_bit(thread, &port->priv->lock_map)) spin_lock_irqsave(&port->bm_lock[thread], flags); - if (port->priv->hw_version == MVPP22) { + if (port->priv->hw_version != MVPP21) { u32 val = 0; if (sizeof(dma_addr_t) == 8) @@ -1061,6 +1282,16 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu) new_long_pool = MVPP2_BM_LONG; if (new_long_pool != port->pool_long->id) { + if (port->tx_fc) { + if (pkt_size > MVPP2_BM_LONG_PKT_SIZE) + mvpp2_bm_pool_update_fc(port, + port->pool_short, + false); + else + mvpp2_bm_pool_update_fc(port, port->pool_long, + false); + } + /* Remove port from old short & long pool */ port->pool_long = mvpp2_bm_pool_use(port, port->pool_long->id, port->pool_long->pkt_size); @@ -1078,6 +1309,25 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu) mvpp2_swf_bm_pool_init(port); mvpp2_set_hw_csum(port, new_long_pool); + + if (port->tx_fc) { + if (pkt_size > MVPP2_BM_LONG_PKT_SIZE) + mvpp2_bm_pool_update_fc(port, port->pool_long, + true); + else + mvpp2_bm_pool_update_fc(port, port->pool_short, + true); + } + + /* Update L4 checksum when jumbo enable/disable on port */ + if (new_long_pool == MVPP2_BM_JUMBO && port->id != 0) { + dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); + dev->hw_features &= ~(NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM); + } else { + dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + } } out_set: @@ -1133,14 +1383,19 @@ static inline void mvpp2_qvec_interrupt_disable(struct mvpp2_queue_vector *qvec) static void mvpp2_interrupts_mask(void *arg) { struct mvpp2_port *port = arg; + int cpu = smp_processor_id(); + u32 thread; /* If the thread isn't used, don't do anything */ - if (smp_processor_id() > port->priv->nthreads) + if (cpu > port->priv->nthreads) return; - mvpp2_thread_write(port->priv, - mvpp2_cpu_to_thread(port->priv, smp_processor_id()), + thread = mvpp2_cpu_to_thread(port->priv, cpu); + + mvpp2_thread_write(port->priv, thread, MVPP2_ISR_RX_TX_MASK_REG(port->id), 0); + mvpp2_thread_write(port->priv, thread, + MVPP2_ISR_RX_ERR_CAUSE_REG(port->id), 0); } /* Unmask the current thread's Rx/Tx interrupts. @@ -1150,20 +1405,25 @@ static void mvpp2_interrupts_mask(void *arg) static void mvpp2_interrupts_unmask(void *arg) { struct mvpp2_port *port = arg; - u32 val; + int cpu = smp_processor_id(); + u32 val, thread; /* If the thread isn't used, don't do anything */ - if (smp_processor_id() > port->priv->nthreads) + if (cpu >= port->priv->nthreads) return; + thread = mvpp2_cpu_to_thread(port->priv, cpu); + val = MVPP2_CAUSE_MISC_SUM_MASK | MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK(port->priv->hw_version); if (port->has_tx_irqs) val |= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK; - mvpp2_thread_write(port->priv, - mvpp2_cpu_to_thread(port->priv, smp_processor_id()), + mvpp2_thread_write(port->priv, thread, MVPP2_ISR_RX_TX_MASK_REG(port->id), val); + mvpp2_thread_write(port->priv, thread, + MVPP2_ISR_RX_ERR_CAUSE_REG(port->id), + MVPP2_ISR_RX_ERR_CAUSE_NONOCC_MASK); } static void @@ -1172,7 +1432,7 @@ mvpp2_shared_interrupt_mask_unmask(struct mvpp2_port *port, bool mask) u32 val; int i; - if (port->priv->hw_version != MVPP22) + if (port->priv->hw_version == MVPP21) return; if (mask) @@ -1188,6 +1448,9 @@ mvpp2_shared_interrupt_mask_unmask(struct mvpp2_port *port, bool mask) mvpp2_thread_write(port->priv, v->sw_thread_id, MVPP2_ISR_RX_TX_MASK_REG(port->id), val); + mvpp2_thread_write(port->priv, v->sw_thread_id, + MVPP2_ISR_RX_ERR_CAUSE_REG(port->id), + MVPP2_ISR_RX_ERR_CAUSE_NONOCC_MASK); } } @@ -1199,7 +1462,7 @@ static bool mvpp2_port_supports_xlg(struct mvpp2_port *port) static bool mvpp2_port_supports_rgmii(struct mvpp2_port *port) { - return !(port->priv->hw_version == MVPP22 && port->gop_id == 0); + return !(port->priv->hw_version != MVPP21 && port->gop_id == 0); } /* Port configuration routines */ @@ -1280,6 +1543,49 @@ static void mvpp22_gop_init_10gkr(struct mvpp2_port *port) writel(val, mpcs + MVPP22_MPCS_CLK_RESET); } +static void mvpp22_gop_fca_enable_periodic(struct mvpp2_port *port, bool en) +{ + struct mvpp2 *priv = port->priv; + void __iomem *fca = priv->iface_base + MVPP22_FCA_BASE(port->gop_id); + u32 val; + + val = readl(fca + MVPP22_FCA_CONTROL_REG); + val &= ~MVPP22_FCA_ENABLE_PERIODIC; + if (en) + val |= MVPP22_FCA_ENABLE_PERIODIC; + writel(val, fca + MVPP22_FCA_CONTROL_REG); +} + +static void mvpp22_gop_fca_set_timer(struct mvpp2_port *port, u32 timer) +{ + struct mvpp2 *priv = port->priv; + void __iomem *fca = priv->iface_base + MVPP22_FCA_BASE(port->gop_id); + u32 lsb, msb; + + lsb = timer & MVPP22_FCA_REG_MASK; + msb = timer >> MVPP22_FCA_REG_SIZE; + + writel(lsb, fca + MVPP22_PERIODIC_COUNTER_LSB_REG); + writel(msb, fca + MVPP22_PERIODIC_COUNTER_MSB_REG); +} + +/* Set Flow Control timer x100 faster than pause quanta to ensure that link + * partner won't send traffic if port is in XOFF mode. + */ +static void mvpp22_gop_fca_set_periodic_timer(struct mvpp2_port *port) +{ + u32 timer; + + timer = (port->priv->tclk / (USEC_PER_SEC * FC_CLK_DIVIDER)) + * FC_QUANTA; + + mvpp22_gop_fca_enable_periodic(port, false); + + mvpp22_gop_fca_set_timer(port, timer); + + mvpp22_gop_fca_enable_periodic(port, true); +} + static int mvpp22_gop_init(struct mvpp2_port *port) { struct mvpp2 *priv = port->priv; @@ -1324,6 +1630,8 @@ static int mvpp22_gop_init(struct mvpp2_port *port) val |= GENCONF_SOFT_RESET1_GOP; regmap_write(priv->sysctrl_base, GENCONF_SOFT_RESET1, val); + mvpp22_gop_fca_set_periodic_timer(port); + unsupported_conf: return 0; @@ -1817,7 +2125,7 @@ static void mvpp2_mac_reset_assert(struct mvpp2_port *port) MVPP2_GMAC_PORT_RESET_MASK; writel(val, port->base + MVPP2_GMAC_CTRL_2_REG); - if (port->priv->hw_version == MVPP22 && port->gop_id == 0) { + if (port->priv->hw_version != MVPP21 && port->gop_id == 0) { val = readl(port->base + MVPP22_XLG_CTRL0_REG) & ~MVPP22_XLG_CTRL0_MAC_RESET_DIS; writel(val, port->base + MVPP22_XLG_CTRL0_REG); @@ -1830,7 +2138,7 @@ static void mvpp22_pcs_reset_assert(struct mvpp2_port *port) void __iomem *mpcs, *xpcs; u32 val; - if (port->priv->hw_version != MVPP22 || port->gop_id != 0) + if (port->priv->hw_version == MVPP21 || port->gop_id != 0) return; mpcs = priv->iface_base + MVPP22_MPCS_BASE(port->gop_id); @@ -1851,7 +2159,7 @@ static void mvpp22_pcs_reset_deassert(struct mvpp2_port *port) void __iomem *mpcs, *xpcs; u32 val; - if (port->priv->hw_version != MVPP22 || port->gop_id != 0) + if (port->priv->hw_version == MVPP21 || port->gop_id != 0) return; mpcs = priv->iface_base + MVPP22_MPCS_BASE(port->gop_id); @@ -2287,7 +2595,7 @@ static void mvpp2_txq_sent_counter_clear(void *arg) int queue; /* If the thread isn't used, don't do anything */ - if (smp_processor_id() > port->priv->nthreads) + if (smp_processor_id() >= port->priv->nthreads) return; for (queue = 0; queue < port->ntxqs; queue++) { @@ -2348,6 +2656,20 @@ static void mvpp2_txp_max_tx_size_set(struct mvpp2_port *port) } } +/* Set the number of non-occupied descriptors threshold */ +static void mvpp2_set_rxq_free_tresh(struct mvpp2_port *port, + struct mvpp2_rx_queue *rxq) +{ + u32 val; + + mvpp2_write(port->priv, MVPP2_RXQ_NUM_REG, rxq->id); + + val = mvpp2_read(port->priv, MVPP2_RXQ_THRESH_REG); + val &= ~MVPP2_RXQ_NON_OCCUPIED_MASK; + val |= MSS_THRESHOLD_STOP << MVPP2_RXQ_NON_OCCUPIED_OFFSET; + mvpp2_write(port->priv, MVPP2_RXQ_THRESH_REG, val); +} + /* Set the number of packets that will be received before Rx interrupt * will be generated by HW. */ @@ -2611,6 +2933,9 @@ static int mvpp2_rxq_init(struct mvpp2_port *port, mvpp2_rx_pkts_coal_set(port, rxq); mvpp2_rx_time_coal_set(port, rxq); + /* Set the number of non occupied descriptors threshold */ + mvpp2_set_rxq_free_tresh(port, rxq); + /* Add number of descriptors ready for receiving packets */ mvpp2_rxq_status_update(port, rxq->id, 0, rxq->size); @@ -2928,6 +3253,9 @@ static void mvpp2_cleanup_rxqs(struct mvpp2_port *port) for (queue = 0; queue < port->nrxqs; queue++) mvpp2_rxq_deinit(port, port->rxqs[queue]); + + if (port->tx_fc) + mvpp2_rxq_disable_fc(port); } /* Init all Rx queues for port */ @@ -2940,6 +3268,10 @@ static int mvpp2_setup_rxqs(struct mvpp2_port *port) if (err) goto err_cleanup; } + + if (port->tx_fc) + mvpp2_rxq_enable_fc(port); + return 0; err_cleanup: @@ -4196,7 +4528,7 @@ static void mvpp2_start_dev(struct mvpp2_port *port) /* Enable interrupts on all threads */ mvpp2_interrupts_enable(port); - if (port->priv->hw_version == MVPP22) + if (port->priv->hw_version != MVPP21) mvpp22_mode_reconfigure(port); if (port->phylink) { @@ -4239,6 +4571,8 @@ static int mvpp2_check_ringparam_valid(struct net_device *dev, if (ring->rx_pending > MVPP2_MAX_RXD_MAX) new_rx_pending = MVPP2_MAX_RXD_MAX; + else if (ring->rx_pending < MSS_THRESHOLD_START) + new_rx_pending = MSS_THRESHOLD_START; else if (!IS_ALIGNED(ring->rx_pending, 16)) new_rx_pending = ALIGN(ring->rx_pending, 16); @@ -4412,7 +4746,7 @@ static int mvpp2_open(struct net_device *dev) valid = true; } - if (priv->hw_version == MVPP22 && port->port_irq) { + if (priv->hw_version != MVPP21 && port->port_irq) { err = request_irq(port->port_irq, mvpp2_port_isr, 0, dev->name, port); if (err) { @@ -5464,7 +5798,7 @@ static void mvpp2_rx_irqs_setup(struct mvpp2_port *port) return; } - /* Handle the more complicated PPv2.2 case */ + /* Handle the more complicated PPv2.2 and PPv2.3 case */ for (i = 0; i < port->nqvecs; i++) { struct mvpp2_queue_vector *qv = port->qvecs + i; @@ -5641,7 +5975,7 @@ static bool mvpp22_port_has_legacy_tx_irqs(struct device_node *port_node, /* Checks if the port dt description has the required Tx interrupts: * - PPv2.1: there are no such interrupts. - * - PPv2.2: + * - PPv2.2 and PPv2.3: * - The old DTs have: "rx-shared", "tx-cpuX" with X in [0...3] * - The new ones have: "hifX" with X in [0..8] * @@ -5883,6 +6217,11 @@ static void mvpp2_phylink_validate(struct phylink_config *config, phylink_set(mask, Autoneg); phylink_set_port_modes(mask); + if (port->priv->global_tx_fc) { + phylink_set(mask, Pause); + phylink_set(mask, Asym_Pause); + } + switch (state->interface) { case PHY_INTERFACE_MODE_10GBASER: case PHY_INTERFACE_MODE_XAUI: @@ -5973,7 +6312,7 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode, old_ctrl4 = ctrl4 = readl(port->base + MVPP22_GMAC_CTRL_4_REG); ctrl0 &= ~MVPP2_GMAC_PORT_TYPE_MASK; - ctrl2 &= ~(MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PCS_ENABLE_MASK); + ctrl2 &= ~(MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PCS_ENABLE_MASK | MVPP2_GMAC_FLOW_CTRL_MASK); /* Configure port type */ if (phy_interface_mode_is_8023z(state->interface)) { @@ -6060,7 +6399,7 @@ static int mvpp2__mac_prepare(struct phylink_config *config, unsigned int mode, MVPP2_GMAC_PORT_RESET_MASK, MVPP2_GMAC_PORT_RESET_MASK); - if (port->priv->hw_version == MVPP22) { + if (port->priv->hw_version != MVPP21) { mvpp22_gop_mask_irq(port); phy_power_off(port->comphy); @@ -6114,7 +6453,7 @@ static int mvpp2_mac_finish(struct phylink_config *config, unsigned int mode, { struct mvpp2_port *port = mvpp2_phylink_to_port(config); - if (port->priv->hw_version == MVPP22 && + if (port->priv->hw_version != MVPP21 && port->phy_interface != interface) { port->phy_interface = interface; @@ -6162,6 +6501,7 @@ static void mvpp2_mac_link_up(struct phylink_config *config, { struct mvpp2_port *port = mvpp2_phylink_to_port(config); u32 val; + int i; if (mvpp2_is_xlg(interface)) { if (!phylink_autoneg_inband(mode)) { @@ -6212,6 +6552,23 @@ static void mvpp2_mac_link_up(struct phylink_config *config, val); } + if (port->priv->global_tx_fc) { + port->tx_fc = tx_pause; + if (tx_pause) + mvpp2_rxq_enable_fc(port); + else + mvpp2_rxq_disable_fc(port); + if (port->priv->percpu_pools) { + for (i = 0; i < port->nrxqs; i++) + mvpp2_bm_pool_update_fc(port, &port->priv->bm_pools[i], tx_pause); + } else { + mvpp2_bm_pool_update_fc(port, port->pool_long, tx_pause); + mvpp2_bm_pool_update_fc(port, port->pool_short, tx_pause); + } + if (port->priv->hw_version == MVPP23) + mvpp23_rx_fifo_fc_en(port->priv, port->id, tx_pause); + } + mvpp2_port_enable(port); mvpp2_egress_enable(port); @@ -6629,7 +6986,7 @@ static void mvpp22_rx_fifo_set_hw(struct mvpp2 *priv, int port, int data_size) mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(port), attr_size); } -/* Initialize TX FIFO's: the total FIFO size is 48kB on PPv2.2. +/* Initialize TX FIFO's: the total FIFO size is 48kB on PPv2.2 and PPv2.3. * 4kB fixed space must be assigned for the loopback port. * Redistribute remaining avialable 44kB space among all active ports. * Guarantee minimum 32kB for 10G port and 8kB for port 1, capable of 2.5G @@ -6678,6 +7035,55 @@ static void mvpp22_rx_fifo_init(struct mvpp2 *priv) mvpp2_write(priv, MVPP2_RX_FIFO_INIT_REG, 0x1); } +/* Configure Rx FIFO Flow control thresholds */ +static void mvpp23_rx_fifo_fc_set_tresh(struct mvpp2 *priv) +{ + int port, val; + + /* Port 0: maximum speed -10Gb/s port + * required by spec RX FIFO threshold 9KB + * Port 1: maximum speed -5Gb/s port + * required by spec RX FIFO threshold 4KB + * Port 2: maximum speed -1Gb/s port + * required by spec RX FIFO threshold 2KB + */ + + /* Without loopback port */ + for (port = 0; port < (MVPP2_MAX_PORTS - 1); port++) { + if (port == 0) { + val = (MVPP23_PORT0_FIFO_TRSH / MVPP2_RX_FC_TRSH_UNIT) + << MVPP2_RX_FC_TRSH_OFFS; + val &= MVPP2_RX_FC_TRSH_MASK; + mvpp2_write(priv, MVPP2_RX_FC_REG(port), val); + } else if (port == 1) { + val = (MVPP23_PORT1_FIFO_TRSH / MVPP2_RX_FC_TRSH_UNIT) + << MVPP2_RX_FC_TRSH_OFFS; + val &= MVPP2_RX_FC_TRSH_MASK; + mvpp2_write(priv, MVPP2_RX_FC_REG(port), val); + } else { + val = (MVPP23_PORT2_FIFO_TRSH / MVPP2_RX_FC_TRSH_UNIT) + << MVPP2_RX_FC_TRSH_OFFS; + val &= MVPP2_RX_FC_TRSH_MASK; + mvpp2_write(priv, MVPP2_RX_FC_REG(port), val); + } + } +} + +/* Configure Rx FIFO Flow control thresholds */ +void mvpp23_rx_fifo_fc_en(struct mvpp2 *priv, int port, bool en) +{ + int val; + + val = mvpp2_read(priv, MVPP2_RX_FC_REG(port)); + + if (en) + val |= MVPP2_RX_FC_EN; + else + val &= ~MVPP2_RX_FC_EN; + + mvpp2_write(priv, MVPP2_RX_FC_REG(port), val); +} + static void mvpp22_tx_fifo_set_hw(struct mvpp2 *priv, int port, int size) { int threshold = MVPP2_TX_FIFO_THRESHOLD(size); @@ -6686,7 +7092,7 @@ static void mvpp22_tx_fifo_set_hw(struct mvpp2 *priv, int port, int size) mvpp2_write(priv, MVPP22_TX_FIFO_THRESH_REG(port), threshold); } -/* Initialize TX FIFO's: the total FIFO size is 19kB on PPv2.2. +/* Initialize TX FIFO's: the total FIFO size is 19kB on PPv2.2 and PPv2.3. * 3kB fixed space must be assigned for the loopback port. * Redistribute remaining avialable 16kB space among all active ports. * The 10G interface should use 10kB (which is maximum possible size @@ -6794,7 +7200,7 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv) if (dram_target_info) mvpp2_conf_mbus_windows(dram_target_info, priv); - if (priv->hw_version == MVPP22) + if (priv->hw_version != MVPP21) mvpp2_axi_init(priv); /* Disable HW PHY polling */ @@ -6829,6 +7235,8 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv) } else { mvpp22_rx_fifo_init(priv); mvpp22_tx_fifo_init(priv); + if (priv->hw_version == MVPP23) + mvpp23_rx_fifo_fc_set_tresh(priv); } if (priv->hw_version == MVPP21) @@ -6854,6 +7262,27 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv) return 0; } +static int mvpp2_get_sram(struct platform_device *pdev, + struct mvpp2 *priv) +{ + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 2); + if (!res) { + if (has_acpi_companion(&pdev->dev)) + dev_warn(&pdev->dev, "ACPI is too old, Flow control not supported\n"); + else + dev_warn(&pdev->dev, "DT is too old, Flow control not supported\n"); + return 0; + } + + priv->cm3_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(priv->cm3_base)) + return PTR_ERR(priv->cm3_base); + + return 0; +} + static int mvpp2_probe(struct platform_device *pdev) { const struct acpi_device_id *acpi_id; @@ -6910,9 +7339,18 @@ static int mvpp2_probe(struct platform_device *pdev) priv->iface_base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(priv->iface_base)) return PTR_ERR(priv->iface_base); + + /* Map CM3 SRAM */ + err = mvpp2_get_sram(pdev, priv); + if (err) + dev_warn(&pdev->dev, "Fail to alloc CM3 SRAM\n"); + + /* Enable global Flow Control only if handler to SRAM not NULL */ + if (priv->cm3_base) + priv->global_tx_fc = true; } - if (priv->hw_version == MVPP22 && dev_of_node(&pdev->dev)) { + if (priv->hw_version != MVPP21 && dev_of_node(&pdev->dev)) { priv->sysctrl_base = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "marvell,system-controller"); @@ -6925,7 +7363,7 @@ static int mvpp2_probe(struct platform_device *pdev) priv->sysctrl_base = NULL; } - if (priv->hw_version == MVPP22 && + if (priv->hw_version != MVPP21 && mvpp2_get_nrxqs(priv) * 2 <= MVPP2_BM_MAX_POOLS) priv->percpu_pools = 1; @@ -6970,7 +7408,7 @@ static int mvpp2_probe(struct platform_device *pdev) if (err < 0) goto err_pp_clk; - if (priv->hw_version == MVPP22) { + if (priv->hw_version != MVPP21) { priv->mg_clk = devm_clk_get(&pdev->dev, "mg_clk"); if (IS_ERR(priv->mg_clk)) { err = PTR_ERR(priv->mg_clk); @@ -7011,7 +7449,7 @@ static int mvpp2_probe(struct platform_device *pdev) return -EINVAL; } - if (priv->hw_version == MVPP22) { + if (priv->hw_version != MVPP21) { err = dma_set_mask(&pdev->dev, MVPP2_DESC_DMA_MASK); if (err) goto err_axi_clk; @@ -7031,6 +7469,14 @@ static int mvpp2_probe(struct platform_device *pdev) priv->port_map |= BIT(i); } + if (priv->hw_version != MVPP21) { + if (mvpp2_read(priv, MVPP2_VER_ID_REG) == MVPP2_VER_PP23) + priv->hw_version = MVPP23; + } + + /* Init mss lock */ + spin_lock_init(&priv->mss_spinlock); + /* Initialize network controller */ err = mvpp2_init(pdev, priv); if (err < 0) { @@ -7070,6 +7516,12 @@ static int mvpp2_probe(struct platform_device *pdev) goto err_port_probe; } + if (priv->global_tx_fc && priv->hw_version != MVPP21) { + err = mvpp2_enable_global_fc(priv); + if (err) + dev_warn(&pdev->dev, "Minimum of CM3 firmware 18.09 and chip revision B0 required for flow control\n"); + } + mvpp2_dbgfs_init(priv, pdev->name); platform_set_drvdata(pdev, priv); @@ -7086,10 +7538,10 @@ err_axi_clk: clk_disable_unprepare(priv->axi_clk); err_mg_core_clk: - if (priv->hw_version == MVPP22) + if (priv->hw_version != MVPP21) clk_disable_unprepare(priv->mg_core_clk); err_mg_clk: - if (priv->hw_version == MVPP22) + if (priv->hw_version != MVPP21) clk_disable_unprepare(priv->mg_clk); err_gop_clk: clk_disable_unprepare(priv->gop_clk); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index 0257c987be0c..4812cdb4609e 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -1172,6 +1172,21 @@ static void mvpp2_prs_mh_init(struct mvpp2 *priv) /* Update shadow table and hw entry */ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MH); mvpp2_prs_hw_write(priv, &pe); + + /* Set MH entry that skip parser */ + pe.index = MVPP2_PE_MH_SKIP_PRS; + mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MH); + mvpp2_prs_sram_shift_set(&pe, MVPP2_MH_SIZE, + MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1); + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS); + + /* Mask all ports */ + mvpp2_prs_tcam_port_map_set(&pe, 0); + + /* Update shadow table and hw entry */ + mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MH); + mvpp2_prs_hw_write(priv, &pe); } /* Set default entires (place holder) for promiscuous, non-promiscuous and diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h index 4b68dd374733..c16e5b9947bd 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h @@ -103,10 +103,11 @@ #define MVPP2_PE_MAC_RANGE_START (MVPP2_PE_MAC_RANGE_END - \ MVPP2_PRS_MAC_RANGE_SIZE + 1) /* VLAN filtering range */ -#define MVPP2_PE_VID_FILT_RANGE_END (MVPP2_PRS_TCAM_SRAM_SIZE - 31) +#define MVPP2_PE_VID_FILT_RANGE_END (MVPP2_PRS_TCAM_SRAM_SIZE - 32) #define MVPP2_PE_VID_FILT_RANGE_START (MVPP2_PE_VID_FILT_RANGE_END - \ MVPP2_PRS_VLAN_FILT_RANGE_SIZE + 1) #define MVPP2_PE_LAST_FREE_TID (MVPP2_PE_MAC_RANGE_START - 1) +#define MVPP2_PE_MH_SKIP_PRS (MVPP2_PRS_TCAM_SRAM_SIZE - 31) #define MVPP2_PE_IP6_EXT_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 30) #define MVPP2_PE_IP6_ADDR_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 29) #define MVPP2_PE_IP4_ADDR_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 28) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile index eb535c98ca38..1a3455620b38 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile @@ -4,10 +4,10 @@ # ccflags-y += -I$(src) -obj-$(CONFIG_OCTEONTX2_MBOX) += octeontx2_mbox.o -obj-$(CONFIG_OCTEONTX2_AF) += octeontx2_af.o +obj-$(CONFIG_OCTEONTX2_MBOX) += rvu_mbox.o +obj-$(CONFIG_OCTEONTX2_AF) += rvu_af.o -octeontx2_mbox-y := mbox.o rvu_trace.o -octeontx2_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \ +rvu_mbox-y := mbox.o rvu_trace.o +rvu_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \ rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o \ - rvu_cpt.o rvu_devlink.o + rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 84a91234ba8e..9caa375d01b1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -14,53 +14,18 @@ #include <linux/pci.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> +#include <linux/ethtool.h> #include <linux/phy.h> #include <linux/of.h> #include <linux/of_mdio.h> #include <linux/of_net.h> #include "cgx.h" +#include "rvu.h" +#include "lmac_common.h" -#define DRV_NAME "octeontx2-cgx" -#define DRV_STRING "Marvell OcteonTX2 CGX/MAC Driver" - -/** - * struct lmac - * @wq_cmd_cmplt: waitq to keep the process blocked until cmd completion - * @cmd_lock: Lock to serialize the command interface - * @resp: command response - * @link_info: link related information - * @event_cb: callback for linkchange events - * @event_cb_lock: lock for serializing callback with unregister - * @cmd_pend: flag set before new command is started - * flag cleared after command response is received - * @cgx: parent cgx port - * @lmac_id: lmac port id - * @name: lmac port name - */ -struct lmac { - wait_queue_head_t wq_cmd_cmplt; - struct mutex cmd_lock; - u64 resp; - struct cgx_link_user_info link_info; - struct cgx_event_cb event_cb; - spinlock_t event_cb_lock; - bool cmd_pend; - struct cgx *cgx; - u8 lmac_id; - char *name; -}; - -struct cgx { - void __iomem *reg_base; - struct pci_dev *pdev; - u8 cgx_id; - u8 lmac_count; - struct lmac *lmac_idmap[MAX_LMAC_PER_CGX]; - struct work_struct cgx_cmd_work; - struct workqueue_struct *cgx_cmd_workq; - struct list_head cgx_list; -}; +#define DRV_NAME "Marvell-CGX/RPM" +#define DRV_STRING "Marvell CGX/RPM Driver" static LIST_HEAD(cgx_list); @@ -76,22 +41,45 @@ static int cgx_fwi_link_change(struct cgx *cgx, int lmac_id, bool en); /* Supported devices */ static const struct pci_device_id cgx_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_CGX) }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10K_RPM) }, { 0, } /* end of table */ }; MODULE_DEVICE_TABLE(pci, cgx_id_table); -static void cgx_write(struct cgx *cgx, u64 lmac, u64 offset, u64 val) +static bool is_dev_rpm(void *cgxd) { - writeq(val, cgx->reg_base + (lmac << 18) + offset); + struct cgx *cgx = cgxd; + + return (cgx->pdev->device == PCI_DEVID_CN10K_RPM); +} + +bool is_lmac_valid(struct cgx *cgx, int lmac_id) +{ + return cgx && test_bit(lmac_id, &cgx->lmac_bmap); } -static u64 cgx_read(struct cgx *cgx, u64 lmac, u64 offset) +struct mac_ops *get_mac_ops(void *cgxd) { - return readq(cgx->reg_base + (lmac << 18) + offset); + if (!cgxd) + return cgxd; + + return ((struct cgx *)cgxd)->mac_ops; } -static inline struct lmac *lmac_pdata(u8 lmac_id, struct cgx *cgx) +void cgx_write(struct cgx *cgx, u64 lmac, u64 offset, u64 val) +{ + writeq(val, cgx->reg_base + (lmac << cgx->mac_ops->lmac_offset) + + offset); +} + +u64 cgx_read(struct cgx *cgx, u64 lmac, u64 offset) +{ + return readq(cgx->reg_base + (lmac << cgx->mac_ops->lmac_offset) + + offset); +} + +struct lmac *lmac_pdata(u8 lmac_id, struct cgx *cgx) { if (!cgx || lmac_id >= MAX_LMAC_PER_CGX) return NULL; @@ -135,6 +123,20 @@ void *cgx_get_pdata(int cgx_id) return NULL; } +void cgx_lmac_write(int cgx_id, int lmac_id, u64 offset, u64 val) +{ + struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + + cgx_write(cgx_dev, lmac_id, offset, val); +} + +u64 cgx_lmac_read(int cgx_id, int lmac_id, u64 offset) +{ + struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + + return cgx_read(cgx_dev, lmac_id, offset); +} + int cgx_get_cgxid(void *cgxd) { struct cgx *cgx = cgxd; @@ -185,8 +187,10 @@ static u64 mac2u64 (u8 *mac_addr) int cgx_lmac_addr_set(u8 cgx_id, u8 lmac_id, u8 *mac_addr) { struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + struct mac_ops *mac_ops; u64 cfg; + mac_ops = cgx_dev->mac_ops; /* copy 6bytes from macaddr */ /* memcpy(&cfg, mac_addr, 6); */ @@ -205,8 +209,11 @@ int cgx_lmac_addr_set(u8 cgx_id, u8 lmac_id, u8 *mac_addr) u64 cgx_lmac_addr_get(u8 cgx_id, u8 lmac_id) { struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + struct mac_ops *mac_ops; u64 cfg; + mac_ops = cgx_dev->mac_ops; + cfg = cgx_read(cgx_dev, 0, CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8); return cfg & CGX_RX_DMAC_ADR_MASK; } @@ -215,15 +222,16 @@ int cgx_set_pkind(void *cgxd, u8 lmac_id, int pkind) { struct cgx *cgx = cgxd; - if (!cgx || lmac_id >= cgx->lmac_count) + if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; cgx_write(cgx, lmac_id, CGXX_CMRX_RX_ID_MAP, (pkind & 0x3F)); return 0; } -static inline u8 cgx_get_lmac_type(struct cgx *cgx, int lmac_id) +static u8 cgx_get_lmac_type(void *cgxd, int lmac_id) { + struct cgx *cgx = cgxd; u64 cfg; cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG); @@ -237,10 +245,10 @@ int cgx_lmac_internal_loopback(void *cgxd, int lmac_id, bool enable) u8 lmac_type; u64 cfg; - if (!cgx || lmac_id >= cgx->lmac_count) + if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; - lmac_type = cgx_get_lmac_type(cgx, lmac_id); + lmac_type = cgx->mac_ops->get_lmac_type(cgx, lmac_id); if (lmac_type == LMAC_MODE_SGMII || lmac_type == LMAC_MODE_QSGMII) { cfg = cgx_read(cgx, lmac_id, CGXX_GMP_PCS_MRX_CTL); if (enable) @@ -262,11 +270,13 @@ int cgx_lmac_internal_loopback(void *cgxd, int lmac_id, bool enable) void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable) { struct cgx *cgx = cgx_get_pdata(cgx_id); + struct mac_ops *mac_ops; u64 cfg = 0; if (!cgx) return; + mac_ops = cgx->mac_ops; if (enable) { /* Enable promiscuous mode on LMAC */ cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0); @@ -324,7 +334,7 @@ int cgx_get_rx_stats(void *cgxd, int lmac_id, int idx, u64 *rx_stat) { struct cgx *cgx = cgxd; - if (!cgx || lmac_id >= cgx->lmac_count) + if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; *rx_stat = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_STAT0 + (idx * 8)); return 0; @@ -334,18 +344,77 @@ int cgx_get_tx_stats(void *cgxd, int lmac_id, int idx, u64 *tx_stat) { struct cgx *cgx = cgxd; - if (!cgx || lmac_id >= cgx->lmac_count) + if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; *tx_stat = cgx_read(cgx, lmac_id, CGXX_CMRX_TX_STAT0 + (idx * 8)); return 0; } +u64 cgx_features_get(void *cgxd) +{ + return ((struct cgx *)cgxd)->hw_features; +} + +static int cgx_set_fec_stats_count(struct cgx_link_user_info *linfo) +{ + if (!linfo->fec) + return 0; + + switch (linfo->lmac_type_id) { + case LMAC_MODE_SGMII: + case LMAC_MODE_XAUI: + case LMAC_MODE_RXAUI: + case LMAC_MODE_QSGMII: + return 0; + case LMAC_MODE_10G_R: + case LMAC_MODE_25G_R: + case LMAC_MODE_100G_R: + case LMAC_MODE_USXGMII: + return 1; + case LMAC_MODE_40G_R: + return 4; + case LMAC_MODE_50G_R: + if (linfo->fec == OTX2_FEC_BASER) + return 2; + else + return 1; + default: + return 0; + } +} + +int cgx_get_fec_stats(void *cgxd, int lmac_id, struct cgx_fec_stats_rsp *rsp) +{ + int stats, fec_stats_count = 0; + int corr_reg, uncorr_reg; + struct cgx *cgx = cgxd; + + if (!cgx || lmac_id >= cgx->lmac_count) + return -ENODEV; + fec_stats_count = + cgx_set_fec_stats_count(&cgx->lmac_idmap[lmac_id]->link_info); + if (cgx->lmac_idmap[lmac_id]->link_info.fec == OTX2_FEC_BASER) { + corr_reg = CGXX_SPUX_LNX_FEC_CORR_BLOCKS; + uncorr_reg = CGXX_SPUX_LNX_FEC_UNCORR_BLOCKS; + } else { + corr_reg = CGXX_SPUX_RSFEC_CORR; + uncorr_reg = CGXX_SPUX_RSFEC_UNCORR; + } + for (stats = 0; stats < fec_stats_count; stats++) { + rsp->fec_corr_blks += + cgx_read(cgx, lmac_id, corr_reg + (stats * 8)); + rsp->fec_uncorr_blks += + cgx_read(cgx, lmac_id, uncorr_reg + (stats * 8)); + } + return 0; +} + int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable) { struct cgx *cgx = cgxd; u64 cfg; - if (!cgx || lmac_id >= cgx->lmac_count) + if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG); @@ -362,7 +431,7 @@ int cgx_lmac_tx_enable(void *cgxd, int lmac_id, bool enable) struct cgx *cgx = cgxd; u64 cfg, last; - if (!cgx || lmac_id >= cgx->lmac_count) + if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG); @@ -377,13 +446,16 @@ int cgx_lmac_tx_enable(void *cgxd, int lmac_id, bool enable) return !!(last & DATA_PKT_TX_EN); } -int cgx_lmac_get_pause_frm(void *cgxd, int lmac_id, - u8 *tx_pause, u8 *rx_pause) +static int cgx_lmac_get_pause_frm_status(void *cgxd, int lmac_id, + u8 *tx_pause, u8 *rx_pause) { struct cgx *cgx = cgxd; u64 cfg; - if (!cgx || lmac_id >= cgx->lmac_count) + if (is_dev_rpm(cgx)) + return 0; + + if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); @@ -394,13 +466,16 @@ int cgx_lmac_get_pause_frm(void *cgxd, int lmac_id, return 0; } -int cgx_lmac_set_pause_frm(void *cgxd, int lmac_id, - u8 tx_pause, u8 rx_pause) +static int cgx_lmac_enadis_pause_frm(void *cgxd, int lmac_id, + u8 tx_pause, u8 rx_pause) { struct cgx *cgx = cgxd; u64 cfg; - if (!cgx || lmac_id >= cgx->lmac_count) + if (is_dev_rpm(cgx)) + return 0; + + if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL); @@ -424,11 +499,12 @@ int cgx_lmac_set_pause_frm(void *cgxd, int lmac_id, return 0; } -static void cgx_lmac_pause_frm_config(struct cgx *cgx, int lmac_id, bool enable) +static void cgx_lmac_pause_frm_config(void *cgxd, int lmac_id, bool enable) { + struct cgx *cgx = cgxd; u64 cfg; - if (!cgx || lmac_id >= cgx->lmac_count) + if (!is_lmac_valid(cgx, lmac_id)) return; if (enable) { /* Enable receive pause frames */ @@ -486,6 +562,9 @@ void cgx_lmac_ptp_config(void *cgxd, int lmac_id, bool enable) if (!cgx) return; + if (is_dev_rpm(cgx)) + return; + if (enable) { /* Enable inbound PTP timestamping */ cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL); @@ -508,7 +587,7 @@ void cgx_lmac_ptp_config(void *cgxd, int lmac_id, bool enable) } /* CGX Firmware interface low level support */ -static int cgx_fwi_cmd_send(u64 req, u64 *resp, struct lmac *lmac) +int cgx_fwi_cmd_send(u64 req, u64 *resp, struct lmac *lmac) { struct cgx *cgx = lmac->cgx; struct device *dev; @@ -556,8 +635,7 @@ unlock: return err; } -static inline int cgx_fwi_cmd_generic(u64 req, u64 *resp, - struct cgx *cgx, int lmac_id) +int cgx_fwi_cmd_generic(u64 req, u64 *resp, struct cgx *cgx, int lmac_id) { struct lmac *lmac; int err; @@ -592,6 +670,7 @@ static inline void cgx_link_usertable_init(void) cgx_speed_mbps[CGX_LINK_25G] = 25000; cgx_speed_mbps[CGX_LINK_40G] = 40000; cgx_speed_mbps[CGX_LINK_50G] = 50000; + cgx_speed_mbps[CGX_LINK_80G] = 80000; cgx_speed_mbps[CGX_LINK_100G] = 100000; cgx_lmactype_string[LMAC_MODE_SGMII] = "SGMII"; @@ -606,6 +685,143 @@ static inline void cgx_link_usertable_init(void) cgx_lmactype_string[LMAC_MODE_USXGMII] = "USXGMII"; } +static int cgx_link_usertable_index_map(int speed) +{ + switch (speed) { + case SPEED_10: + return CGX_LINK_10M; + case SPEED_100: + return CGX_LINK_100M; + case SPEED_1000: + return CGX_LINK_1G; + case SPEED_2500: + return CGX_LINK_2HG; + case SPEED_5000: + return CGX_LINK_5G; + case SPEED_10000: + return CGX_LINK_10G; + case SPEED_20000: + return CGX_LINK_20G; + case SPEED_25000: + return CGX_LINK_25G; + case SPEED_40000: + return CGX_LINK_40G; + case SPEED_50000: + return CGX_LINK_50G; + case 80000: + return CGX_LINK_80G; + case SPEED_100000: + return CGX_LINK_100G; + case SPEED_UNKNOWN: + return CGX_LINK_NONE; + } + return CGX_LINK_NONE; +} + +static void set_mod_args(struct cgx_set_link_mode_args *args, + u32 speed, u8 duplex, u8 autoneg, u64 mode) +{ + /* Fill default values incase of user did not pass + * valid parameters + */ + if (args->duplex == DUPLEX_UNKNOWN) + args->duplex = duplex; + if (args->speed == SPEED_UNKNOWN) + args->speed = speed; + if (args->an == AUTONEG_UNKNOWN) + args->an = autoneg; + args->mode = mode; + args->ports = 0; +} + +static void otx2_map_ethtool_link_modes(u64 bitmask, + struct cgx_set_link_mode_args *args) +{ + switch (bitmask) { + case ETHTOOL_LINK_MODE_10baseT_Half_BIT: + set_mod_args(args, 10, 1, 1, BIT_ULL(CGX_MODE_SGMII)); + break; + case ETHTOOL_LINK_MODE_10baseT_Full_BIT: + set_mod_args(args, 10, 0, 1, BIT_ULL(CGX_MODE_SGMII)); + break; + case ETHTOOL_LINK_MODE_100baseT_Half_BIT: + set_mod_args(args, 100, 1, 1, BIT_ULL(CGX_MODE_SGMII)); + break; + case ETHTOOL_LINK_MODE_100baseT_Full_BIT: + set_mod_args(args, 100, 0, 1, BIT_ULL(CGX_MODE_SGMII)); + break; + case ETHTOOL_LINK_MODE_1000baseT_Half_BIT: + set_mod_args(args, 1000, 1, 1, BIT_ULL(CGX_MODE_SGMII)); + break; + case ETHTOOL_LINK_MODE_1000baseT_Full_BIT: + set_mod_args(args, 1000, 0, 1, BIT_ULL(CGX_MODE_SGMII)); + break; + case ETHTOOL_LINK_MODE_1000baseX_Full_BIT: + set_mod_args(args, 1000, 0, 0, BIT_ULL(CGX_MODE_1000_BASEX)); + break; + case ETHTOOL_LINK_MODE_10000baseT_Full_BIT: + set_mod_args(args, 1000, 0, 1, BIT_ULL(CGX_MODE_QSGMII)); + break; + case ETHTOOL_LINK_MODE_10000baseSR_Full_BIT: + set_mod_args(args, 10000, 0, 0, BIT_ULL(CGX_MODE_10G_C2C)); + break; + case ETHTOOL_LINK_MODE_10000baseLR_Full_BIT: + set_mod_args(args, 10000, 0, 0, BIT_ULL(CGX_MODE_10G_C2M)); + break; + case ETHTOOL_LINK_MODE_10000baseKR_Full_BIT: + set_mod_args(args, 10000, 0, 1, BIT_ULL(CGX_MODE_10G_KR)); + break; + case ETHTOOL_LINK_MODE_25000baseSR_Full_BIT: + set_mod_args(args, 25000, 0, 0, BIT_ULL(CGX_MODE_25G_C2C)); + break; + case ETHTOOL_LINK_MODE_25000baseCR_Full_BIT: + set_mod_args(args, 25000, 0, 1, BIT_ULL(CGX_MODE_25G_CR)); + break; + case ETHTOOL_LINK_MODE_25000baseKR_Full_BIT: + set_mod_args(args, 25000, 0, 1, BIT_ULL(CGX_MODE_25G_KR)); + break; + case ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT: + set_mod_args(args, 40000, 0, 0, BIT_ULL(CGX_MODE_40G_C2C)); + break; + case ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT: + set_mod_args(args, 40000, 0, 0, BIT_ULL(CGX_MODE_40G_C2M)); + break; + case ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT: + set_mod_args(args, 40000, 0, 1, BIT_ULL(CGX_MODE_40G_CR4)); + break; + case ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT: + set_mod_args(args, 40000, 0, 1, BIT_ULL(CGX_MODE_40G_KR4)); + break; + case ETHTOOL_LINK_MODE_50000baseSR_Full_BIT: + set_mod_args(args, 50000, 0, 0, BIT_ULL(CGX_MODE_50G_C2C)); + break; + case ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT: + set_mod_args(args, 50000, 0, 0, BIT_ULL(CGX_MODE_50G_C2M)); + break; + case ETHTOOL_LINK_MODE_50000baseCR_Full_BIT: + set_mod_args(args, 50000, 0, 1, BIT_ULL(CGX_MODE_50G_CR)); + break; + case ETHTOOL_LINK_MODE_50000baseKR_Full_BIT: + set_mod_args(args, 50000, 0, 1, BIT_ULL(CGX_MODE_50G_KR)); + break; + case ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT: + set_mod_args(args, 100000, 0, 0, BIT_ULL(CGX_MODE_100G_C2C)); + break; + case ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT: + set_mod_args(args, 100000, 0, 0, BIT_ULL(CGX_MODE_100G_C2M)); + break; + case ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT: + set_mod_args(args, 100000, 0, 1, BIT_ULL(CGX_MODE_100G_CR4)); + break; + case ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT: + set_mod_args(args, 100000, 0, 1, BIT_ULL(CGX_MODE_100G_KR4)); + break; + default: + set_mod_args(args, 0, 1, 0, BIT_ULL(CGX_MODE_MAX)); + break; + } +} + static inline void link_status_user_format(u64 lstat, struct cgx_link_user_info *linfo, struct cgx *cgx, u8 lmac_id) @@ -615,6 +831,8 @@ static inline void link_status_user_format(u64 lstat, linfo->link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat); linfo->full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat); linfo->speed = cgx_speed_mbps[FIELD_GET(RESP_LINKSTAT_SPEED, lstat)]; + linfo->an = FIELD_GET(RESP_LINKSTAT_AN, lstat); + linfo->fec = FIELD_GET(RESP_LINKSTAT_FEC, lstat); linfo->lmac_type_id = cgx_get_lmac_type(cgx, lmac_id); lmac_string = cgx_lmactype_string[linfo->lmac_type_id]; strncpy(linfo->lmac_type, lmac_string, LMACTYPE_STR_LEN - 1); @@ -642,6 +860,9 @@ static inline void cgx_link_change_handler(u64 lstat, lmac->link_info = event.link_uinfo; linfo = &lmac->link_info; + if (err_type == CGX_ERR_SPEED_CHANGE_INVALID) + return; + /* Ensure callback doesn't get unregistered until we finish it */ spin_lock(&lmac->event_cb_lock); @@ -670,7 +891,8 @@ static inline bool cgx_cmdresp_is_linkevent(u64 event) id = FIELD_GET(EVTREG_ID, event); if (id == CGX_CMD_LINK_BRING_UP || - id == CGX_CMD_LINK_BRING_DOWN) + id == CGX_CMD_LINK_BRING_DOWN || + id == CGX_CMD_MODE_CHANGE) return true; else return false; @@ -686,12 +908,16 @@ static inline bool cgx_event_is_linkevent(u64 event) static irqreturn_t cgx_fwi_event_handler(int irq, void *data) { + u64 event, offset, clear_bit; struct lmac *lmac = data; struct cgx *cgx; - u64 event; cgx = lmac->cgx; + /* Clear SW_INT for RPM and CMR_INT for CGX */ + offset = cgx->mac_ops->int_register; + clear_bit = cgx->mac_ops->int_ena_bit; + event = cgx_read(cgx, lmac->lmac_id, CGX_EVENT_REG); if (!FIELD_GET(EVTREG_ACK, event)) @@ -727,7 +953,7 @@ static irqreturn_t cgx_fwi_event_handler(int irq, void *data) * Ack the interrupt register as well. */ cgx_write(lmac->cgx, lmac->lmac_id, CGX_EVENT_REG, 0); - cgx_write(lmac->cgx, lmac->lmac_id, CGXX_CMRX_INT, FW_CGX_INT); + cgx_write(lmac->cgx, lmac->lmac_id, offset, clear_bit); return IRQ_HANDLED; } @@ -771,20 +997,79 @@ int cgx_get_fwdata_base(u64 *base) { u64 req = 0, resp; struct cgx *cgx; + int first_lmac; int err; cgx = list_first_entry_or_null(&cgx_list, struct cgx, cgx_list); if (!cgx) return -ENXIO; + first_lmac = find_first_bit(&cgx->lmac_bmap, MAX_LMAC_PER_CGX); req = FIELD_SET(CMDREG_ID, CGX_CMD_GET_FWD_BASE, req); - err = cgx_fwi_cmd_generic(req, &resp, cgx, 0); + err = cgx_fwi_cmd_generic(req, &resp, cgx, first_lmac); if (!err) *base = FIELD_GET(RESP_FWD_BASE, resp); return err; } +int cgx_set_link_mode(void *cgxd, struct cgx_set_link_mode_args args, + int cgx_id, int lmac_id) +{ + struct cgx *cgx = cgxd; + u64 req = 0, resp; + + if (!cgx) + return -ENODEV; + + if (args.mode) + otx2_map_ethtool_link_modes(args.mode, &args); + if (!args.speed && args.duplex && !args.an) + return -EINVAL; + + req = FIELD_SET(CMDREG_ID, CGX_CMD_MODE_CHANGE, req); + req = FIELD_SET(CMDMODECHANGE_SPEED, + cgx_link_usertable_index_map(args.speed), req); + req = FIELD_SET(CMDMODECHANGE_DUPLEX, args.duplex, req); + req = FIELD_SET(CMDMODECHANGE_AN, args.an, req); + req = FIELD_SET(CMDMODECHANGE_PORT, args.ports, req); + req = FIELD_SET(CMDMODECHANGE_FLAGS, args.mode, req); + + return cgx_fwi_cmd_generic(req, &resp, cgx, lmac_id); +} +int cgx_set_fec(u64 fec, int cgx_id, int lmac_id) +{ + u64 req = 0, resp; + struct cgx *cgx; + int err = 0; + + cgx = cgx_get_pdata(cgx_id); + if (!cgx) + return -ENXIO; + + req = FIELD_SET(CMDREG_ID, CGX_CMD_SET_FEC, req); + req = FIELD_SET(CMDSETFEC, fec, req); + err = cgx_fwi_cmd_generic(req, &resp, cgx, lmac_id); + if (err) + return err; + + cgx->lmac_idmap[lmac_id]->link_info.fec = + FIELD_GET(RESP_LINKSTAT_FEC, resp); + return cgx->lmac_idmap[lmac_id]->link_info.fec; +} + +int cgx_get_phy_fec_stats(void *cgxd, int lmac_id) +{ + struct cgx *cgx = cgxd; + u64 req = 0, resp; + + if (!cgx) + return -ENODEV; + + req = FIELD_SET(CMDREG_ID, CGX_CMD_GET_PHY_FEC_STATS, req); + return cgx_fwi_cmd_generic(req, &resp, cgx, lmac_id); +} + static int cgx_fwi_link_change(struct cgx *cgx, int lmac_id, bool enable) { u64 req = 0; @@ -800,10 +1085,11 @@ static int cgx_fwi_link_change(struct cgx *cgx, int lmac_id, bool enable) static inline int cgx_fwi_read_version(u64 *resp, struct cgx *cgx) { + int first_lmac = find_first_bit(&cgx->lmac_bmap, MAX_LMAC_PER_CGX); u64 req = 0; req = FIELD_SET(CMDREG_ID, CGX_CMD_GET_FW_VER, req); - return cgx_fwi_cmd_generic(req, resp, cgx, 0); + return cgx_fwi_cmd_generic(req, resp, cgx, first_lmac); } static int cgx_lmac_verify_fwi_version(struct cgx *cgx) @@ -836,8 +1122,8 @@ static void cgx_lmac_linkup_work(struct work_struct *work) struct device *dev = &cgx->pdev->dev; int i, err; - /* Do Link up for all the lmacs */ - for (i = 0; i < cgx->lmac_count; i++) { + /* Do Link up for all the enabled lmacs */ + for_each_set_bit(i, &cgx->lmac_bmap, MAX_LMAC_PER_CGX) { err = cgx_fwi_link_change(cgx, i, true); if (err) dev_info(dev, "cgx port %d:%d Link up command failed\n", @@ -857,12 +1143,77 @@ int cgx_lmac_linkup_start(void *cgxd) return 0; } +static void cgx_lmac_get_fifolen(struct cgx *cgx) +{ + u64 cfg; + + cfg = cgx_read(cgx, 0, CGX_CONST); + cgx->mac_ops->fifo_len = FIELD_GET(CGX_CONST_RXFIFO_SIZE, cfg); +} + +static int cgx_configure_interrupt(struct cgx *cgx, struct lmac *lmac, + int cnt, bool req_free) +{ + struct mac_ops *mac_ops = cgx->mac_ops; + u64 offset, ena_bit; + unsigned int irq; + int err; + + irq = pci_irq_vector(cgx->pdev, mac_ops->lmac_fwi + + cnt * mac_ops->irq_offset); + offset = mac_ops->int_set_reg; + ena_bit = mac_ops->int_ena_bit; + + if (req_free) { + free_irq(irq, lmac); + return 0; + } + + err = request_irq(irq, cgx_fwi_event_handler, 0, lmac->name, lmac); + if (err) + return err; + + /* Enable interrupt */ + cgx_write(cgx, lmac->lmac_id, offset, ena_bit); + return 0; +} + +int cgx_get_nr_lmacs(void *cgxd) +{ + struct cgx *cgx = cgxd; + + return cgx_read(cgx, 0, CGXX_CMRX_RX_LMACS) & 0x7ULL; +} + +u8 cgx_get_lmacid(void *cgxd, u8 lmac_index) +{ + struct cgx *cgx = cgxd; + + return cgx->lmac_idmap[lmac_index]->lmac_id; +} + +unsigned long cgx_get_lmac_bmap(void *cgxd) +{ + struct cgx *cgx = cgxd; + + return cgx->lmac_bmap; +} + static int cgx_lmac_init(struct cgx *cgx) { struct lmac *lmac; + u64 lmac_list; int i, err; - cgx->lmac_count = cgx_read(cgx, 0, CGXX_CMRX_RX_LMACS) & 0x7; + cgx_lmac_get_fifolen(cgx); + + cgx->lmac_count = cgx->mac_ops->get_nr_lmacs(cgx); + /* lmac_list specifies which lmacs are enabled + * when bit n is set to 1, LMAC[n] is enabled + */ + if (cgx->mac_ops->non_contiguous_serdes_lane) + lmac_list = cgx_read(cgx, 0, CGXX_CMRX_RX_LMACS) & 0xFULL; + if (cgx->lmac_count > MAX_LMAC_PER_CGX) cgx->lmac_count = MAX_LMAC_PER_CGX; @@ -876,24 +1227,25 @@ static int cgx_lmac_init(struct cgx *cgx) goto err_lmac_free; } sprintf(lmac->name, "cgx_fwi_%d_%d", cgx->cgx_id, i); - lmac->lmac_id = i; + if (cgx->mac_ops->non_contiguous_serdes_lane) { + lmac->lmac_id = __ffs64(lmac_list); + lmac_list &= ~BIT_ULL(lmac->lmac_id); + } else { + lmac->lmac_id = i; + } + lmac->cgx = cgx; init_waitqueue_head(&lmac->wq_cmd_cmplt); mutex_init(&lmac->cmd_lock); spin_lock_init(&lmac->event_cb_lock); - err = request_irq(pci_irq_vector(cgx->pdev, - CGX_LMAC_FWI + i * 9), - cgx_fwi_event_handler, 0, lmac->name, lmac); + err = cgx_configure_interrupt(cgx, lmac, lmac->lmac_id, false); if (err) goto err_irq; - /* Enable interrupt */ - cgx_write(cgx, lmac->lmac_id, CGXX_CMRX_INT_ENA_W1S, - FW_CGX_INT); - /* Add reference */ - cgx->lmac_idmap[i] = lmac; - cgx_lmac_pause_frm_config(cgx, i, true); + cgx->lmac_idmap[lmac->lmac_id] = lmac; + cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true); + set_bit(lmac->lmac_id, &cgx->lmac_bmap); } return cgx_lmac_verify_fwi_version(cgx); @@ -917,12 +1269,12 @@ static int cgx_lmac_exit(struct cgx *cgx) } /* Free all lmac related resources */ - for (i = 0; i < cgx->lmac_count; i++) { - cgx_lmac_pause_frm_config(cgx, i, false); + for_each_set_bit(i, &cgx->lmac_bmap, MAX_LMAC_PER_CGX) { lmac = cgx->lmac_idmap[i]; if (!lmac) continue; - free_irq(pci_irq_vector(cgx->pdev, CGX_LMAC_FWI + i * 9), lmac); + cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, false); + cgx_configure_interrupt(cgx, lmac, lmac->lmac_id, true); kfree(lmac->name); kfree(lmac); } @@ -930,6 +1282,37 @@ static int cgx_lmac_exit(struct cgx *cgx) return 0; } +static void cgx_populate_features(struct cgx *cgx) +{ + if (is_dev_rpm(cgx)) + cgx->hw_features = (RVU_MAC_RPM | RVU_LMAC_FEAT_FC); + else + cgx->hw_features = (RVU_LMAC_FEAT_FC | RVU_LMAC_FEAT_PTP); +} + +static struct mac_ops cgx_mac_ops = { + .name = "cgx", + .csr_offset = 0, + .lmac_offset = 18, + .int_register = CGXX_CMRX_INT, + .int_set_reg = CGXX_CMRX_INT_ENA_W1S, + .irq_offset = 9, + .int_ena_bit = FW_CGX_INT, + .lmac_fwi = CGX_LMAC_FWI, + .non_contiguous_serdes_lane = false, + .rx_stats_cnt = 9, + .tx_stats_cnt = 18, + .get_nr_lmacs = cgx_get_nr_lmacs, + .get_lmac_type = cgx_get_lmac_type, + .mac_lmac_intl_lbk = cgx_lmac_internal_loopback, + .mac_get_rx_stats = cgx_get_rx_stats, + .mac_get_tx_stats = cgx_get_tx_stats, + .mac_enadis_rx_pause_fwding = cgx_lmac_enadis_rx_pause_fwding, + .mac_get_pause_frm_status = cgx_lmac_get_pause_frm_status, + .mac_enadis_pause_frm = cgx_lmac_enadis_pause_frm, + .mac_pause_frm_config = cgx_lmac_pause_frm_config, +}; + static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct device *dev = &pdev->dev; @@ -943,6 +1326,12 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, cgx); + /* Use mac_ops to get MAC specific features */ + if (pdev->device == PCI_DEVID_CN10K_RPM) + cgx->mac_ops = rpm_get_mac_ops(); + else + cgx->mac_ops = &cgx_mac_ops; + err = pci_enable_device(pdev); if (err) { dev_err(dev, "Failed to enable PCI device\n"); @@ -964,7 +1353,7 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_release_regions; } - nvec = CGX_NVEC; + nvec = pci_msix_vec_count(cgx->pdev); err = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_MSIX); if (err < 0 || err != nvec) { dev_err(dev, "Request for %d msix vectors failed, err %d\n", @@ -988,6 +1377,10 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) cgx_link_usertable_init(); + cgx_populate_features(cgx); + + mutex_init(&cgx->lock); + err = cgx_lmac_init(cgx); if (err) goto err_release_lmac; @@ -1011,8 +1404,10 @@ static void cgx_remove(struct pci_dev *pdev) { struct cgx *cgx = pci_get_drvdata(pdev); - cgx_lmac_exit(cgx); - list_del(&cgx->cgx_list); + if (cgx) { + cgx_lmac_exit(cgx); + list_del(&cgx->cgx_list); + } pci_free_irq_vectors(pdev); pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index bcfc3e5f66bb..12521262164a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -13,6 +13,7 @@ #include "mbox.h" #include "cgx_fw_if.h" +#include "rpm.h" /* PCI device IDs */ #define PCI_DEVID_OCTEONTX2_CGX 0xA059 @@ -42,12 +43,12 @@ #define CGXX_CMRX_RX_ID_MAP 0x060 #define CGXX_CMRX_RX_STAT0 0x070 #define CGXX_CMRX_RX_LMACS 0x128 -#define CGXX_CMRX_RX_DMAC_CTL0 0x1F8 +#define CGXX_CMRX_RX_DMAC_CTL0 (0x1F8 + mac_ops->csr_offset) #define CGX_DMAC_CTL0_CAM_ENABLE BIT_ULL(3) #define CGX_DMAC_CAM_ACCEPT BIT_ULL(3) #define CGX_DMAC_MCAST_MODE BIT_ULL(1) #define CGX_DMAC_BCAST_MODE BIT_ULL(0) -#define CGXX_CMRX_RX_DMAC_CAM0 0x200 +#define CGXX_CMRX_RX_DMAC_CAM0 (0x200 + mac_ops->csr_offset) #define CGX_DMAC_CAM_ADDR_ENABLE BIT_ULL(48) #define CGXX_CMRX_RX_DMAC_CAM1 0x400 #define CGX_RX_DMAC_ADR_MASK GENMASK_ULL(47, 0) @@ -55,7 +56,13 @@ #define CGXX_SCRATCH0_REG 0x1050 #define CGXX_SCRATCH1_REG 0x1058 #define CGX_CONST 0x2000 +#define CGX_CONST_RXFIFO_SIZE GENMASK_ULL(23, 0) #define CGXX_SPUX_CONTROL1 0x10000 +#define CGXX_SPUX_LNX_FEC_CORR_BLOCKS 0x10700 +#define CGXX_SPUX_LNX_FEC_UNCORR_BLOCKS 0x10800 +#define CGXX_SPUX_RSFEC_CORR 0x10088 +#define CGXX_SPUX_RSFEC_UNCORR 0x10090 + #define CGXX_SPUX_CONTROL1_LBK BIT_ULL(14) #define CGXX_GMP_PCS_MRX_CTL 0x30000 #define CGXX_GMP_PCS_MRX_CTL_LBK BIT_ULL(14) @@ -81,7 +88,6 @@ #define CGX_CMD_TIMEOUT 2200 /* msecs */ #define DEFAULT_PAUSE_TIME 0x7FF -#define CGX_NVEC 37 #define CGX_LMAC_FWI 0 enum cgx_nix_stat_type { @@ -147,5 +153,16 @@ int cgx_lmac_set_pause_frm(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause); void cgx_lmac_ptp_config(void *cgxd, int lmac_id, bool enable); u8 cgx_lmac_get_p2x(int cgx_id, int lmac_id); - +int cgx_set_fec(u64 fec, int cgx_id, int lmac_id); +int cgx_get_fec_stats(void *cgxd, int lmac_id, struct cgx_fec_stats_rsp *rsp); +int cgx_get_phy_fec_stats(void *cgxd, int lmac_id); +int cgx_set_link_mode(void *cgxd, struct cgx_set_link_mode_args args, + int cgx_id, int lmac_id); +u64 cgx_features_get(void *cgxd); +struct mac_ops *get_mac_ops(void *cgxd); +int cgx_get_nr_lmacs(void *cgxd); +u8 cgx_get_lmacid(void *cgxd, u8 lmac_index); +unsigned long cgx_get_lmac_bmap(void *cgxd); +void cgx_lmac_write(int cgx_id, int lmac_id, u64 offset, u64 val); +u64 cgx_lmac_read(int cgx_id, int lmac_id, u64 offset); #endif /* CGX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h index c3702fa58b6b..aa4e42f78f13 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h @@ -43,7 +43,13 @@ enum cgx_error_type { CGX_ERR_TRAINING_FAIL, CGX_ERR_RX_EQU_FAIL, CGX_ERR_SPUX_BER_FAIL, - CGX_ERR_SPUX_RSFEC_ALGN_FAIL, /* = 22 */ + CGX_ERR_SPUX_RSFEC_ALGN_FAIL, + CGX_ERR_SPUX_MARKER_LOCK_FAIL, + CGX_ERR_SET_FEC_INVALID, + CGX_ERR_SET_FEC_FAIL, + CGX_ERR_MODULE_INVALID, + CGX_ERR_MODULE_NOT_PRESENT, + CGX_ERR_SPEED_CHANGE_INVALID, }; /* LINK speed types */ @@ -59,10 +65,41 @@ enum cgx_link_speed { CGX_LINK_25G, CGX_LINK_40G, CGX_LINK_50G, + CGX_LINK_80G, CGX_LINK_100G, CGX_LINK_SPEED_MAX, }; +enum CGX_MODE_ { + CGX_MODE_SGMII, + CGX_MODE_1000_BASEX, + CGX_MODE_QSGMII, + CGX_MODE_10G_C2C, + CGX_MODE_10G_C2M, + CGX_MODE_10G_KR, + CGX_MODE_20G_C2C, + CGX_MODE_25G_C2C, + CGX_MODE_25G_C2M, + CGX_MODE_25G_2_C2C, + CGX_MODE_25G_CR, + CGX_MODE_25G_KR, + CGX_MODE_40G_C2C, + CGX_MODE_40G_C2M, + CGX_MODE_40G_CR4, + CGX_MODE_40G_KR4, + CGX_MODE_40GAUI_C2C, + CGX_MODE_50G_C2C, + CGX_MODE_50G_C2M, + CGX_MODE_50G_4_C2C, + CGX_MODE_50G_CR, + CGX_MODE_50G_KR, + CGX_MODE_80GAUI_C2C, + CGX_MODE_100G_C2C, + CGX_MODE_100G_C2M, + CGX_MODE_100G_CR4, + CGX_MODE_100G_KR4, + CGX_MODE_MAX /* = 29 */ +}; /* REQUEST ID types. Input to firmware */ enum cgx_cmd_id { CGX_CMD_NONE, @@ -75,12 +112,25 @@ enum cgx_cmd_id { CGX_CMD_INTERNAL_LBK, CGX_CMD_EXTERNAL_LBK, CGX_CMD_HIGIG, - CGX_CMD_LINK_STATE_CHANGE, + CGX_CMD_LINK_STAT_CHANGE, CGX_CMD_MODE_CHANGE, /* hot plug support */ CGX_CMD_INTF_SHUTDOWN, CGX_CMD_GET_MKEX_PRFL_SIZE, CGX_CMD_GET_MKEX_PRFL_ADDR, CGX_CMD_GET_FWD_BASE, /* get base address of shared FW data */ + CGX_CMD_GET_LINK_MODES, /* Supported Link Modes */ + CGX_CMD_SET_LINK_MODE, + CGX_CMD_GET_SUPPORTED_FEC, + CGX_CMD_SET_FEC, + CGX_CMD_GET_AN, + CGX_CMD_SET_AN, + CGX_CMD_GET_ADV_LINK_MODES, + CGX_CMD_GET_ADV_FEC, + CGX_CMD_GET_PHY_MOD_TYPE, /* line-side modulation type: NRZ or PAM4 */ + CGX_CMD_SET_PHY_MOD_TYPE, + CGX_CMD_PRBS, + CGX_CMD_DISPLAY_EYE, + CGX_CMD_GET_PHY_FEC_STATS, }; /* async event ids */ @@ -154,6 +204,7 @@ enum cgx_cmd_own { * CGX_STAT_SUCCESS */ #define RESP_FWD_BASE GENMASK_ULL(56, 9) +#define RESP_LINKSTAT_LMAC_TYPE GENMASK_ULL(35, 28) /* Response to cmd ID - CGX_CMD_LINK_BRING_UP/DOWN, event ID CGX_EVT_LINK_CHANGE * status can be either CGX_STAT_FAIL or CGX_STAT_SUCCESS @@ -171,13 +222,19 @@ struct cgx_lnk_sts { uint64_t full_duplex:1; uint64_t speed:4; /* cgx_link_speed */ uint64_t err_type:10; - uint64_t reserved2:39; + uint64_t an:1; /* AN supported or not */ + uint64_t fec:2; /* FEC type if enabled, if not 0 */ + uint64_t port:8; + uint64_t reserved2:28; }; #define RESP_LINKSTAT_UP GENMASK_ULL(9, 9) #define RESP_LINKSTAT_FDUPLEX GENMASK_ULL(10, 10) #define RESP_LINKSTAT_SPEED GENMASK_ULL(14, 11) #define RESP_LINKSTAT_ERRTYPE GENMASK_ULL(24, 15) +#define RESP_LINKSTAT_AN GENMASK_ULL(25, 25) +#define RESP_LINKSTAT_FEC GENMASK_ULL(27, 26) +#define RESP_LINKSTAT_PORT GENMASK_ULL(35, 28) /* scratchx(1) CSR used for non-secure SW->ATF communication * This CSR acts as a command register @@ -199,4 +256,12 @@ struct cgx_lnk_sts { #define CMDLINKCHANGE_FULLDPLX BIT_ULL(9) #define CMDLINKCHANGE_SPEED GENMASK_ULL(13, 10) +#define CMDSETFEC GENMASK_ULL(9, 8) +/* command argument to be passed for cmd ID - CGX_CMD_MODE_CHANGE */ +#define CMDMODECHANGE_SPEED GENMASK_ULL(11, 8) +#define CMDMODECHANGE_DUPLEX GENMASK_ULL(12, 12) +#define CMDMODECHANGE_AN GENMASK_ULL(13, 13) +#define CMDMODECHANGE_PORT GENMASK_ULL(21, 14) +#define CMDMODECHANGE_FLAGS GENMASK_ULL(63, 22) + #endif /* __CGX_FW_INTF_H__ */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h index 17f6f42f4453..e66109367487 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h @@ -155,6 +155,8 @@ enum nix_scheduler { #define NIC_HW_MIN_FRS 40 #define NIC_HW_MAX_FRS 9212 #define SDP_HW_MAX_FRS 65535 +#define CN10K_LMAC_LINK_MAX_FRS 16380 /* 16k - FCS */ +#define CN10K_LBK_LINK_MAX_FRS 65535 /* 64k */ /* NIX RX action operation*/ #define NIX_RX_ACTIONOP_DROP (0x0ull) @@ -191,6 +193,9 @@ enum nix_scheduler { #define NIX_LINK_LBK(a) (12 + (a)) #define NIX_CHAN_CGX_LMAC_CHX(a, b, c) (0x800 + 0x100 * (a) + 0x10 * (b) + (c)) #define NIX_CHAN_LBK_CHX(a, b) (0 + 0x100 * (a) + (b)) +#define NIX_CHAN_SDP_CH_START (0x700ull) + +#define SDP_CHANNELS 256 /* NIX LSO format indices. * As of now TSO is the only one using, so statically assigning indices. diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h new file mode 100644 index 000000000000..45706fd87120 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell OcteonTx2 RPM driver + * + * Copyright (C) 2020 Marvell. + */ + +#ifndef LMAC_COMMON_H +#define LMAC_COMMON_H + +#include "rvu.h" +#include "cgx.h" +/** + * struct lmac + * @wq_cmd_cmplt: waitq to keep the process blocked until cmd completion + * @cmd_lock: Lock to serialize the command interface + * @resp: command response + * @link_info: link related information + * @event_cb: callback for linkchange events + * @event_cb_lock: lock for serializing callback with unregister + * @cmd_pend: flag set before new command is started + * flag cleared after command response is received + * @cgx: parent cgx port + * @lmac_id: lmac port id + * @name: lmac port name + */ +struct lmac { + wait_queue_head_t wq_cmd_cmplt; + /* Lock to serialize the command interface */ + struct mutex cmd_lock; + u64 resp; + struct cgx_link_user_info link_info; + struct cgx_event_cb event_cb; + /* lock for serializing callback with unregister */ + spinlock_t event_cb_lock; + bool cmd_pend; + struct cgx *cgx; + u8 lmac_id; + char *name; +}; + +/* CGX & RPM has different feature set + * update the structure fields with different one + */ +struct mac_ops { + char *name; + /* Features like RXSTAT, TXSTAT, DMAC FILTER csrs differs by fixed + * bar offset for example + * CGX DMAC_CTL0 0x1f8 + * RPM DMAC_CTL0 0x4ff8 + */ + u64 csr_offset; + /* For ATF to send events to kernel, there is no dedicated interrupt + * defined hence CGX uses OVERFLOW bit in CMR_INT. RPM block supports + * SW_INT so that ATF triggers this interrupt after processing of + * requested command + */ + u64 int_register; + u64 int_set_reg; + /* lmac offset is different is RPM */ + u8 lmac_offset; + u8 irq_offset; + u8 int_ena_bit; + u8 lmac_fwi; + u32 fifo_len; + bool non_contiguous_serdes_lane; + /* RPM & CGX differs in number of Receive/transmit stats */ + u8 rx_stats_cnt; + u8 tx_stats_cnt; + /* Incase of RPM get number of lmacs from RPMX_CMR_RX_LMACS[LMAC_EXIST] + * number of setbits in lmac_exist tells number of lmacs + */ + int (*get_nr_lmacs)(void *cgx); + u8 (*get_lmac_type)(void *cgx, int lmac_id); + int (*mac_lmac_intl_lbk)(void *cgx, int lmac_id, + bool enable); + /* Register Stats related functions */ + int (*mac_get_rx_stats)(void *cgx, int lmac_id, + int idx, u64 *rx_stat); + int (*mac_get_tx_stats)(void *cgx, int lmac_id, + int idx, u64 *tx_stat); + + /* Enable LMAC Pause Frame Configuration */ + void (*mac_enadis_rx_pause_fwding)(void *cgxd, + int lmac_id, + bool enable); + + int (*mac_get_pause_frm_status)(void *cgxd, + int lmac_id, + u8 *tx_pause, + u8 *rx_pause); + + int (*mac_enadis_pause_frm)(void *cgxd, + int lmac_id, + u8 tx_pause, + u8 rx_pause); + + void (*mac_pause_frm_config)(void *cgxd, + int lmac_id, + bool enable); +}; + +struct cgx { + void __iomem *reg_base; + struct pci_dev *pdev; + u8 cgx_id; + u8 lmac_count; + struct lmac *lmac_idmap[MAX_LMAC_PER_CGX]; + struct work_struct cgx_cmd_work; + struct workqueue_struct *cgx_cmd_workq; + struct list_head cgx_list; + u64 hw_features; + struct mac_ops *mac_ops; + unsigned long lmac_bmap; /* bitmap of enabled lmacs */ + /* Lock to serialize read/write of global csrs like + * RPMX_MTI_STAT_DATA_HI_CDC etc + */ + struct mutex lock; +}; + +typedef struct cgx rpm_t; + +/* Function Declarations */ +void cgx_write(struct cgx *cgx, u64 lmac, u64 offset, u64 val); +u64 cgx_read(struct cgx *cgx, u64 lmac, u64 offset); +struct lmac *lmac_pdata(u8 lmac_id, struct cgx *cgx); +int cgx_fwi_cmd_send(u64 req, u64 *resp, struct lmac *lmac); +int cgx_fwi_cmd_generic(u64 req, u64 *resp, struct cgx *cgx, int lmac_id); +bool is_lmac_valid(struct cgx *cgx, int lmac_id); +struct mac_ops *rpm_get_mac_ops(void); + +#endif /* LMAC_COMMON_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c index bbabb8e64201..0a37ca96aab8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c @@ -20,9 +20,9 @@ static const u16 msgs_offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); void __otx2_mbox_reset(struct otx2_mbox *mbox, int devid) { - void *hw_mbase = mbox->hwbase + (devid * MBOX_SIZE); struct otx2_mbox_dev *mdev = &mbox->dev[devid]; struct mbox_hdr *tx_hdr, *rx_hdr; + void *hw_mbase = mdev->hwbase; tx_hdr = hw_mbase + mbox->tx_start; rx_hdr = hw_mbase + mbox->rx_start; @@ -56,12 +56,9 @@ void otx2_mbox_destroy(struct otx2_mbox *mbox) } EXPORT_SYMBOL(otx2_mbox_destroy); -int otx2_mbox_init(struct otx2_mbox *mbox, void *hwbase, struct pci_dev *pdev, - void *reg_base, int direction, int ndevs) +static int otx2_mbox_setup(struct otx2_mbox *mbox, struct pci_dev *pdev, + void *reg_base, int direction, int ndevs) { - struct otx2_mbox_dev *mdev; - int devid; - switch (direction) { case MBOX_DIR_AFPF: case MBOX_DIR_PFVF: @@ -121,7 +118,6 @@ int otx2_mbox_init(struct otx2_mbox *mbox, void *hwbase, struct pci_dev *pdev, } mbox->reg_base = reg_base; - mbox->hwbase = hwbase; mbox->pdev = pdev; mbox->dev = kcalloc(ndevs, sizeof(struct otx2_mbox_dev), GFP_KERNEL); @@ -129,11 +125,27 @@ int otx2_mbox_init(struct otx2_mbox *mbox, void *hwbase, struct pci_dev *pdev, otx2_mbox_destroy(mbox); return -ENOMEM; } - mbox->ndevs = ndevs; + + return 0; +} + +int otx2_mbox_init(struct otx2_mbox *mbox, void *hwbase, struct pci_dev *pdev, + void *reg_base, int direction, int ndevs) +{ + struct otx2_mbox_dev *mdev; + int devid, err; + + err = otx2_mbox_setup(mbox, pdev, reg_base, direction, ndevs); + if (err) + return err; + + mbox->hwbase = hwbase; + for (devid = 0; devid < ndevs; devid++) { mdev = &mbox->dev[devid]; mdev->mbase = mbox->hwbase + (devid * MBOX_SIZE); + mdev->hwbase = mdev->mbase; spin_lock_init(&mdev->mbox_lock); /* Init header to reset value */ otx2_mbox_reset(mbox, devid); @@ -143,6 +155,35 @@ int otx2_mbox_init(struct otx2_mbox *mbox, void *hwbase, struct pci_dev *pdev, } EXPORT_SYMBOL(otx2_mbox_init); +/* Initialize mailbox with the set of mailbox region addresses + * in the array hwbase. + */ +int otx2_mbox_regions_init(struct otx2_mbox *mbox, void **hwbase, + struct pci_dev *pdev, void *reg_base, + int direction, int ndevs) +{ + struct otx2_mbox_dev *mdev; + int devid, err; + + err = otx2_mbox_setup(mbox, pdev, reg_base, direction, ndevs); + if (err) + return err; + + mbox->hwbase = hwbase[0]; + + for (devid = 0; devid < ndevs; devid++) { + mdev = &mbox->dev[devid]; + mdev->mbase = hwbase[devid]; + mdev->hwbase = hwbase[devid]; + spin_lock_init(&mdev->mbox_lock); + /* Init header to reset value */ + otx2_mbox_reset(mbox, devid); + } + + return 0; +} +EXPORT_SYMBOL(otx2_mbox_regions_init); + int otx2_mbox_wait_for_rsp(struct otx2_mbox *mbox, int devid) { unsigned long timeout = jiffies + msecs_to_jiffies(MBOX_RSP_TIMEOUT); @@ -175,9 +216,9 @@ EXPORT_SYMBOL(otx2_mbox_busy_poll_for_rsp); void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid) { - void *hw_mbase = mbox->hwbase + (devid * MBOX_SIZE); struct otx2_mbox_dev *mdev = &mbox->dev[devid]; struct mbox_hdr *tx_hdr, *rx_hdr; + void *hw_mbase = mdev->hwbase; tx_hdr = hw_mbase + mbox->tx_start; rx_hdr = hw_mbase + mbox->rx_start; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index a0fa44941204..ea456099b33c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -36,7 +36,7 @@ #define INTR_MASK(pfvfs) ((pfvfs < 64) ? (BIT_ULL(pfvfs) - 1) : (~0ull)) -#define MBOX_RSP_TIMEOUT 2000 /* Time(ms) to wait for mbox response */ +#define MBOX_RSP_TIMEOUT 3000 /* Time(ms) to wait for mbox response */ #define MBOX_MSG_ALIGN 16 /* Align mbox msg start to 16bytes */ @@ -52,6 +52,7 @@ struct otx2_mbox_dev { void *mbase; /* This dev's mbox region */ + void *hwbase; spinlock_t mbox_lock; u16 msg_size; /* Total msg size to be sent */ u16 rsp_size; /* Total rsp size to be sure the reply is ok */ @@ -98,6 +99,9 @@ void otx2_mbox_destroy(struct otx2_mbox *mbox); int otx2_mbox_init(struct otx2_mbox *mbox, void __force *hwbase, struct pci_dev *pdev, void __force *reg_base, int direction, int ndevs); +int otx2_mbox_regions_init(struct otx2_mbox *mbox, void __force **hwbase, + struct pci_dev *pdev, void __force *reg_base, + int direction, int ndevs); void otx2_mbox_msg_send(struct otx2_mbox *mbox, int devid); int otx2_mbox_wait_for_rsp(struct otx2_mbox *mbox, int devid); int otx2_mbox_busy_poll_for_rsp(struct otx2_mbox *mbox, int devid); @@ -149,6 +153,16 @@ M(CGX_PTP_RX_ENABLE, 0x20C, cgx_ptp_rx_enable, msg_req, msg_rsp) \ M(CGX_PTP_RX_DISABLE, 0x20D, cgx_ptp_rx_disable, msg_req, msg_rsp) \ M(CGX_CFG_PAUSE_FRM, 0x20E, cgx_cfg_pause_frm, cgx_pause_frm_cfg, \ cgx_pause_frm_cfg) \ +M(CGX_FEC_SET, 0x210, cgx_set_fec_param, fec_mode, fec_mode) \ +M(CGX_FEC_STATS, 0x211, cgx_fec_stats, msg_req, cgx_fec_stats_rsp) \ +M(CGX_GET_PHY_FEC_STATS, 0x212, cgx_get_phy_fec_stats, msg_req, msg_rsp) \ +M(CGX_FW_DATA_GET, 0x213, cgx_get_aux_link_info, msg_req, cgx_fw_data) \ +M(CGX_SET_LINK_MODE, 0x214, cgx_set_link_mode, cgx_set_link_mode_req,\ + cgx_set_link_mode_rsp) \ +M(CGX_FEATURES_GET, 0x215, cgx_features_get, msg_req, \ + cgx_features_info_msg) \ +M(RPM_STATS, 0x216, rpm_stats, msg_req, rpm_stats_rsp) \ + /* NPA mbox IDs (range 0x400 - 0x5FF) */ \ /* NPA mbox IDs (range 0x400 - 0x5FF) */ \ M(NPA_LF_ALLOC, 0x400, npa_lf_alloc, \ npa_lf_alloc_req, npa_lf_alloc_rsp) \ @@ -237,6 +251,9 @@ M(NIX_BP_ENABLE, 0x8016, nix_bp_enable, nix_bp_cfg_req, \ nix_bp_cfg_rsp) \ M(NIX_BP_DISABLE, 0x8017, nix_bp_disable, nix_bp_cfg_req, msg_rsp) \ M(NIX_GET_MAC_ADDR, 0x8018, nix_get_mac_addr, msg_req, nix_get_mac_addr_rsp) \ +M(NIX_CN10K_AQ_ENQ, 0x8019, nix_cn10k_aq_enq, nix_cn10k_aq_enq_req, \ + nix_cn10k_aq_enq_rsp) \ +M(NIX_GET_HW_INFO, 0x801a, nix_get_hw_info, msg_req, nix_hw_info) /* Messages initiated by AF (range 0xC00 - 0xDFF) */ #define MBOX_UP_CGX_MESSAGES \ @@ -354,12 +371,17 @@ struct get_hw_cap_rsp { struct cgx_stats_rsp { struct mbox_msghdr hdr; -#define CGX_RX_STATS_COUNT 13 +#define CGX_RX_STATS_COUNT 9 #define CGX_TX_STATS_COUNT 18 u64 rx_stats[CGX_RX_STATS_COUNT]; u64 tx_stats[CGX_TX_STATS_COUNT]; }; +struct cgx_fec_stats_rsp { + struct mbox_msghdr hdr; + u64 fec_corr_blks; + u64 fec_uncorr_blks; +}; /* Structure for requesting the operation for * setting/getting mac address in the CGX interface */ @@ -373,6 +395,8 @@ struct cgx_link_user_info { uint64_t full_duplex:1; uint64_t lmac_type_id:4; uint64_t speed:20; /* speed in Mbps */ + uint64_t an:1; /* AN supported or not */ + uint64_t fec:2; /* FEC type if enabled else 0 */ #define LMACTYPE_STR_LEN 16 char lmac_type[LMACTYPE_STR_LEN]; }; @@ -391,6 +415,98 @@ struct cgx_pause_frm_cfg { u8 tx_pause; }; +enum fec_type { + OTX2_FEC_NONE, + OTX2_FEC_BASER, + OTX2_FEC_RS, + OTX2_FEC_STATS_CNT = 2, + OTX2_FEC_OFF, +}; + +struct fec_mode { + struct mbox_msghdr hdr; + int fec; +}; + +struct sfp_eeprom_s { +#define SFP_EEPROM_SIZE 256 + u16 sff_id; + u8 buf[SFP_EEPROM_SIZE]; + u64 reserved; +}; + +struct phy_s { + struct { + u64 can_change_mod_type:1; + u64 mod_type:1; + u64 has_fec_stats:1; + } misc; + struct fec_stats_s { + u32 rsfec_corr_cws; + u32 rsfec_uncorr_cws; + u32 brfec_corr_blks; + u32 brfec_uncorr_blks; + } fec_stats; +}; + +struct cgx_lmac_fwdata_s { + u16 rw_valid; + u64 supported_fec; + u64 supported_an; + u64 supported_link_modes; + /* only applicable if AN is supported */ + u64 advertised_fec; + u64 advertised_link_modes; + /* Only applicable if SFP/QSFP slot is present */ + struct sfp_eeprom_s sfp_eeprom; + struct phy_s phy; +#define LMAC_FWDATA_RESERVED_MEM 1021 + u64 reserved[LMAC_FWDATA_RESERVED_MEM]; +}; + +struct cgx_fw_data { + struct mbox_msghdr hdr; + struct cgx_lmac_fwdata_s fwdata; +}; + +struct cgx_set_link_mode_args { + u32 speed; + u8 duplex; + u8 an; + u8 ports; + u64 mode; +}; + +struct cgx_set_link_mode_req { +#define AUTONEG_UNKNOWN 0xff + struct mbox_msghdr hdr; + struct cgx_set_link_mode_args args; +}; + +struct cgx_set_link_mode_rsp { + struct mbox_msghdr hdr; + int status; +}; + +#define RVU_LMAC_FEAT_FC BIT_ULL(0) /* pause frames */ +#define RVU_LMAC_FEAT_PTP BIT_ULL(1) /* precison time protocol */ +#define RVU_MAC_VERSION BIT_ULL(2) +#define RVU_MAC_CGX BIT_ULL(3) +#define RVU_MAC_RPM BIT_ULL(4) + +struct cgx_features_info_msg { + struct mbox_msghdr hdr; + u64 lmac_features; +}; + +struct rpm_stats_rsp { + struct mbox_msghdr hdr; +#define RPM_RX_STATS_COUNT 43 +#define RPM_TX_STATS_COUNT 34 + u64 rx_stats[RPM_RX_STATS_COUNT]; + u64 tx_stats[RPM_TX_STATS_COUNT]; +}; + /* NPA mbox message formats */ /* NPA mailbox error codes @@ -545,6 +661,39 @@ struct nix_lf_free_req { u64 flags; }; +/* CN10K NIX AQ enqueue msg */ +struct nix_cn10k_aq_enq_req { + struct mbox_msghdr hdr; + u32 qidx; + u8 ctype; + u8 op; + union { + struct nix_cn10k_rq_ctx_s rq; + struct nix_cn10k_sq_ctx_s sq; + struct nix_cq_ctx_s cq; + struct nix_rsse_s rss; + struct nix_rx_mce_s mce; + }; + union { + struct nix_cn10k_rq_ctx_s rq_mask; + struct nix_cn10k_sq_ctx_s sq_mask; + struct nix_cq_ctx_s cq_mask; + struct nix_rsse_s rss_mask; + struct nix_rx_mce_s mce_mask; + }; +}; + +struct nix_cn10k_aq_enq_rsp { + struct mbox_msghdr hdr; + union { + struct nix_cn10k_rq_ctx_s rq; + struct nix_cn10k_sq_ctx_s sq; + struct nix_cq_ctx_s cq; + struct nix_rsse_s rss; + struct nix_rx_mce_s mce; + }; +}; + /* NIX AQ enqueue msg */ struct nix_aq_enq_req { struct mbox_msghdr hdr; @@ -809,6 +958,12 @@ struct nix_bp_cfg_rsp { u8 chan_cnt; /* Number of channel for which bpids are assigned */ }; +struct nix_hw_info { + struct mbox_msghdr hdr; + u16 max_mtu; + u16 min_mtu; +}; + /* NPC mbox message structs */ #define NPC_MCAM_ENTRY_INVALID 0xFFFF diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c index f69f4f35ae48..1ee37853f338 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c @@ -21,6 +21,9 @@ #define PCI_SUBSYS_DEVID_OCTX2_95XX_PTP 0xB300 #define PCI_SUBSYS_DEVID_OCTX2_LOKI_PTP 0xB400 #define PCI_SUBSYS_DEVID_OCTX2_95MM_PTP 0xB500 +#define PCI_SUBSYS_DEVID_CN10K_A_PTP 0xB900 +#define PCI_SUBSYS_DEVID_CNF10K_A_PTP 0xBA00 +#define PCI_SUBSYS_DEVID_CNF10K_B_PTP 0xBC00 #define PCI_DEVID_OCTEONTX2_RST 0xA085 #define PCI_PTP_BAR_NO 0 @@ -234,6 +237,15 @@ static const struct pci_device_id ptp_id_table[] = { { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP, PCI_VENDOR_ID_CAVIUM, PCI_SUBSYS_DEVID_OCTX2_95MM_PTP) }, + { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP, + PCI_VENDOR_ID_CAVIUM, + PCI_SUBSYS_DEVID_CN10K_A_PTP) }, + { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP, + PCI_VENDOR_ID_CAVIUM, + PCI_SUBSYS_DEVID_CNF10K_A_PTP) }, + { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP, + PCI_VENDOR_ID_CAVIUM, + PCI_SUBSYS_DEVID_CNF10K_B_PTP) }, { 0, } }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c new file mode 100644 index 000000000000..a91ccdc59403 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTx2 RPM driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#include "cgx.h" +#include "lmac_common.h" + +static struct mac_ops rpm_mac_ops = { + .name = "rpm", + .csr_offset = 0x4e00, + .lmac_offset = 20, + .int_register = RPMX_CMRX_SW_INT, + .int_set_reg = RPMX_CMRX_SW_INT_ENA_W1S, + .irq_offset = 1, + .int_ena_bit = BIT_ULL(0), + .lmac_fwi = RPM_LMAC_FWI, + .non_contiguous_serdes_lane = true, + .rx_stats_cnt = 43, + .tx_stats_cnt = 34, + .get_nr_lmacs = rpm_get_nr_lmacs, + .get_lmac_type = rpm_get_lmac_type, + .mac_lmac_intl_lbk = rpm_lmac_internal_loopback, + .mac_get_rx_stats = rpm_get_rx_stats, + .mac_get_tx_stats = rpm_get_tx_stats, + .mac_enadis_rx_pause_fwding = rpm_lmac_enadis_rx_pause_fwding, + .mac_get_pause_frm_status = rpm_lmac_get_pause_frm_status, + .mac_enadis_pause_frm = rpm_lmac_enadis_pause_frm, + .mac_pause_frm_config = rpm_lmac_pause_frm_config, +}; + +struct mac_ops *rpm_get_mac_ops(void) +{ + return &rpm_mac_ops; +} + +static void rpm_write(rpm_t *rpm, u64 lmac, u64 offset, u64 val) +{ + cgx_write(rpm, lmac, offset, val); +} + +static u64 rpm_read(rpm_t *rpm, u64 lmac, u64 offset) +{ + return cgx_read(rpm, lmac, offset); +} + +int rpm_get_nr_lmacs(void *rpmd) +{ + rpm_t *rpm = rpmd; + + return hweight8(rpm_read(rpm, 0, CGXX_CMRX_RX_LMACS) & 0xFULL); +} + +void rpm_lmac_enadis_rx_pause_fwding(void *rpmd, int lmac_id, bool enable) +{ + rpm_t *rpm = rpmd; + u64 cfg; + + if (!rpm) + return; + + if (enable) { + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + } else { + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + } +} + +int rpm_lmac_get_pause_frm_status(void *rpmd, int lmac_id, + u8 *tx_pause, u8 *rx_pause) +{ + rpm_t *rpm = rpmd; + u64 cfg; + + if (!is_lmac_valid(rpm, lmac_id)) + return -ENODEV; + + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + *rx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE); + + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + *tx_pause = !(cfg & RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE); + return 0; +} + +int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause, + u8 rx_pause) +{ + rpm_t *rpm = rpmd; + u64 cfg; + + if (!is_lmac_valid(rpm, lmac_id)) + return -ENODEV; + + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE; + cfg |= rx_pause ? 0x0 : RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE; + cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE; + cfg |= rx_pause ? 0x0 : RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; + cfg |= tx_pause ? 0x0 : RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + + cfg = rpm_read(rpm, 0, RPMX_CMR_RX_OVR_BP); + if (tx_pause) { + cfg &= ~RPMX_CMR_RX_OVR_BP_EN(lmac_id); + } else { + cfg |= RPMX_CMR_RX_OVR_BP_EN(lmac_id); + cfg &= ~RPMX_CMR_RX_OVR_BP_BP(lmac_id); + } + rpm_write(rpm, 0, RPMX_CMR_RX_OVR_BP, cfg); + return 0; +} + +void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable) +{ + rpm_t *rpm = rpmd; + u64 cfg; + + if (enable) { + /* Enable 802.3 pause frame mode */ + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + + /* Enable receive pause frames */ + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + + /* Enable forward pause to TX block */ + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + + /* Enable pause frames transmission */ + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg &= ~RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + + /* Set pause time and interval */ + cfg = rpm_read(rpm, lmac_id, + RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA); + cfg &= ~0xFFFFULL; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA, + cfg | RPM_DEFAULT_PAUSE_TIME); + /* Set pause interval as the hardware default is too short */ + cfg = rpm_read(rpm, lmac_id, + RPMX_MTI_MAC100X_CL01_QUANTA_THRESH); + cfg &= ~0xFFFFULL; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_CL01_QUANTA_THRESH, + cfg | (RPM_DEFAULT_PAUSE_TIME / 2)); + + } else { + /* ALL pause frames received are completely ignored */ + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + + /* Disable forward pause to TX block */ + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + + /* Disable pause frames transmission */ + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + } +} + +int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat) +{ + rpm_t *rpm = rpmd; + u64 val_lo, val_hi; + + if (!rpm || lmac_id >= rpm->lmac_count) + return -ENODEV; + + mutex_lock(&rpm->lock); + + /* Update idx to point per lmac Rx statistics page */ + idx += lmac_id * rpm->mac_ops->rx_stats_cnt; + + /* Read lower 32 bits of counter */ + val_lo = rpm_read(rpm, 0, RPMX_MTI_STAT_RX_STAT_PAGES_COUNTERX + + (idx * 8)); + + /* upon read of lower 32 bits, higher 32 bits are written + * to RPMX_MTI_STAT_DATA_HI_CDC + */ + val_hi = rpm_read(rpm, 0, RPMX_MTI_STAT_DATA_HI_CDC); + + *rx_stat = (val_hi << 32 | val_lo); + + mutex_unlock(&rpm->lock); + return 0; +} + +int rpm_get_tx_stats(void *rpmd, int lmac_id, int idx, u64 *tx_stat) +{ + rpm_t *rpm = rpmd; + u64 val_lo, val_hi; + + if (!rpm || lmac_id >= rpm->lmac_count) + return -ENODEV; + + mutex_lock(&rpm->lock); + + /* Update idx to point per lmac Tx statistics page */ + idx += lmac_id * rpm->mac_ops->tx_stats_cnt; + + val_lo = rpm_read(rpm, 0, RPMX_MTI_STAT_TX_STAT_PAGES_COUNTERX + + (idx * 8)); + val_hi = rpm_read(rpm, 0, RPMX_MTI_STAT_DATA_HI_CDC); + + *tx_stat = (val_hi << 32 | val_lo); + + mutex_unlock(&rpm->lock); + return 0; +} + +u8 rpm_get_lmac_type(void *rpmd, int lmac_id) +{ + rpm_t *rpm = rpmd; + u64 req = 0, resp; + int err; + + req = FIELD_SET(CMDREG_ID, CGX_CMD_GET_LINK_STS, req); + err = cgx_fwi_cmd_generic(req, &resp, rpm, 0); + if (!err) + return FIELD_GET(RESP_LINKSTAT_LMAC_TYPE, resp); + return err; +} + +int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable) +{ + rpm_t *rpm = rpmd; + u8 lmac_type; + u64 cfg; + + if (!rpm || lmac_id >= rpm->lmac_count) + return -ENODEV; + lmac_type = rpm->mac_ops->get_lmac_type(rpm, lmac_id); + if (lmac_type == LMAC_MODE_100G_R) { + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1); + + if (enable) + cfg |= RPMX_MTI_PCS_LBK; + else + cfg &= ~RPMX_MTI_PCS_LBK; + rpm_write(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1, cfg); + } else { + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_LPCSX_CONTROL1); + if (enable) + cfg |= RPMX_MTI_PCS_LBK; + else + cfg &= ~RPMX_MTI_PCS_LBK; + rpm_write(rpm, lmac_id, RPMX_MTI_LPCSX_CONTROL1, cfg); + } + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h new file mode 100644 index 000000000000..d32e74bd5964 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell OcteonTx2 RPM driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#ifndef RPM_H +#define RPM_H + +#include <linux/bits.h> + +/* PCI device IDs */ +#define PCI_DEVID_CN10K_RPM 0xA060 + +/* Registers */ +#define RPMX_CMRX_SW_INT 0x180 +#define RPMX_CMRX_SW_INT_W1S 0x188 +#define RPMX_CMRX_SW_INT_ENA_W1S 0x198 +#define RPMX_CMRX_LINK_CFG 0x1070 +#define RPMX_MTI_PCS100X_CONTROL1 0x20000 +#define RPMX_MTI_LPCSX_CONTROL1 0x30000 +#define RPMX_MTI_PCS_LBK BIT_ULL(14) +#define RPMX_MTI_LPCSX_CONTROL(id) (0x30000 | ((id) * 0x100)) + +#define RPMX_CMRX_LINK_RANGE_MASK GENMASK_ULL(19, 16) +#define RPMX_CMRX_LINK_BASE_MASK GENMASK_ULL(11, 0) +#define RPMX_MTI_MAC100X_COMMAND_CONFIG 0x8010 +#define RPMX_MTI_MAC100X_COMMAND_CONFIG_RX_P_DISABLE BIT_ULL(29) +#define RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE BIT_ULL(28) +#define RPMX_MTI_MAC100X_COMMAND_CONFIG_PAUSE_IGNORE BIT_ULL(8) +#define RPMX_MTI_MAC100X_COMMAND_CONFIG_PFC_MODE BIT_ULL(19) +#define RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA 0x80A8 +#define RPMX_MTI_MAC100X_CL01_QUANTA_THRESH 0x80C8 +#define RPM_DEFAULT_PAUSE_TIME 0xFFFF +#define RPMX_CMR_RX_OVR_BP 0x4120 +#define RPMX_CMR_RX_OVR_BP_EN(x) BIT_ULL((x) + 8) +#define RPMX_CMR_RX_OVR_BP_BP(x) BIT_ULL((x) + 4) +#define RPMX_MTI_STAT_RX_STAT_PAGES_COUNTERX 0x12000 +#define RPMX_MTI_STAT_TX_STAT_PAGES_COUNTERX 0x13000 +#define RPMX_MTI_STAT_DATA_HI_CDC 0x10038 + +#define RPM_LMAC_FWI 0xa + +/* Function Declarations */ +int rpm_get_nr_lmacs(void *rpmd); +u8 rpm_get_lmac_type(void *rpmd, int lmac_id); +int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable); +void rpm_lmac_enadis_rx_pause_fwding(void *rpmd, int lmac_id, bool enable); +int rpm_lmac_get_pause_frm_status(void *cgxd, int lmac_id, u8 *tx_pause, + u8 *rx_pause); +void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable); +int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause, + u8 rx_pause); +int rpm_get_tx_stats(void *rpmd, int lmac_id, int idx, u64 *tx_stat); +int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat); +#endif /* RPM_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 50c2a1d800f4..d9a1a71c7ccc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -22,7 +22,7 @@ #include "rvu_trace.h" -#define DRV_NAME "octeontx2-af" +#define DRV_NAME "rvu_af" #define DRV_STRING "Marvell OcteonTX2 RVU Admin Function Driver" static int rvu_get_hwvf(struct rvu *rvu, int pcifunc); @@ -78,6 +78,9 @@ static void rvu_setup_hw_capabilities(struct rvu *rvu) if (is_rvu_96xx_A0(rvu)) hw->cap.nix_rx_multicast = false; } + + if (!is_rvu_otx2(rvu)) + hw->cap.per_pf_mbox_regs = true; } /* Poll a RVU block's register 'offset', for a 'zero' @@ -852,6 +855,31 @@ static int rvu_setup_cpt_hw_resource(struct rvu *rvu, int blkaddr) return rvu_alloc_bitmap(&block->lf); } +static void rvu_get_lbk_bufsize(struct rvu *rvu) +{ + struct pci_dev *pdev = NULL; + void __iomem *base; + u64 lbk_const; + + pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, + PCI_DEVID_OCTEONTX2_LBK, pdev); + if (!pdev) + return; + + base = pci_ioremap_bar(pdev, 0); + if (!base) + goto err_put; + + lbk_const = readq(base + LBK_CONST); + + /* cache fifo size */ + rvu->hw->lbk_bufsize = FIELD_GET(LBK_CONST_BUF_SIZE, lbk_const); + + iounmap(base); +err_put: + pci_dev_put(pdev); +} + static int rvu_setup_hw_resources(struct rvu *rvu) { struct rvu_hwinfo *hw = rvu->hw; @@ -1003,6 +1031,10 @@ cpt: rvu_scan_block(rvu, block); } + err = rvu_set_channels_base(rvu); + if (err) + goto msix_err; + err = rvu_npc_init(rvu); if (err) goto npc_err; @@ -1018,10 +1050,14 @@ cpt: if (err) goto npa_err; + rvu_get_lbk_bufsize(rvu); + err = rvu_nix_init(rvu); if (err) goto nix_err; + rvu_program_channels(rvu); + return 0; nix_err: @@ -1936,41 +1972,105 @@ static inline void rvu_afvf_mbox_up_handler(struct work_struct *work) __rvu_mbox_up_handler(mwork, TYPE_AFVF); } +static int rvu_get_mbox_regions(struct rvu *rvu, void **mbox_addr, + int num, int type) +{ + struct rvu_hwinfo *hw = rvu->hw; + int region; + u64 bar4; + + /* For cn10k platform VF mailbox regions of a PF follows after the + * PF <-> AF mailbox region. Whereas for Octeontx2 it is read from + * RVU_PF_VF_BAR4_ADDR register. + */ + if (type == TYPE_AFVF) { + for (region = 0; region < num; region++) { + if (hw->cap.per_pf_mbox_regs) { + bar4 = rvu_read64(rvu, BLKADDR_RVUM, + RVU_AF_PFX_BAR4_ADDR(0)) + + MBOX_SIZE; + bar4 += region * MBOX_SIZE; + } else { + bar4 = rvupf_read64(rvu, RVU_PF_VF_BAR4_ADDR); + bar4 += region * MBOX_SIZE; + } + mbox_addr[region] = (void *)ioremap_wc(bar4, MBOX_SIZE); + if (!mbox_addr[region]) + goto error; + } + return 0; + } + + /* For cn10k platform AF <-> PF mailbox region of a PF is read from per + * PF registers. Whereas for Octeontx2 it is read from + * RVU_AF_PF_BAR4_ADDR register. + */ + for (region = 0; region < num; region++) { + if (hw->cap.per_pf_mbox_regs) { + bar4 = rvu_read64(rvu, BLKADDR_RVUM, + RVU_AF_PFX_BAR4_ADDR(region)); + } else { + bar4 = rvu_read64(rvu, BLKADDR_RVUM, + RVU_AF_PF_BAR4_ADDR); + bar4 += region * MBOX_SIZE; + } + mbox_addr[region] = (void *)ioremap_wc(bar4, MBOX_SIZE); + if (!mbox_addr[region]) + goto error; + } + return 0; + +error: + while (region--) + iounmap((void __iomem *)mbox_addr[region]); + return -ENOMEM; +} + static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw, int type, int num, void (mbox_handler)(struct work_struct *), void (mbox_up_handler)(struct work_struct *)) { - void __iomem *hwbase = NULL, *reg_base; - int err, i, dir, dir_up; + int err = -EINVAL, i, dir, dir_up; + void __iomem *reg_base; struct rvu_work *mwork; + void **mbox_regions; const char *name; - u64 bar4_addr; + + mbox_regions = kcalloc(num, sizeof(void *), GFP_KERNEL); + if (!mbox_regions) + return -ENOMEM; switch (type) { case TYPE_AFPF: name = "rvu_afpf_mailbox"; - bar4_addr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PF_BAR4_ADDR); dir = MBOX_DIR_AFPF; dir_up = MBOX_DIR_AFPF_UP; reg_base = rvu->afreg_base; + err = rvu_get_mbox_regions(rvu, mbox_regions, num, TYPE_AFPF); + if (err) + goto free_regions; break; case TYPE_AFVF: name = "rvu_afvf_mailbox"; - bar4_addr = rvupf_read64(rvu, RVU_PF_VF_BAR4_ADDR); dir = MBOX_DIR_PFVF; dir_up = MBOX_DIR_PFVF_UP; reg_base = rvu->pfreg_base; + err = rvu_get_mbox_regions(rvu, mbox_regions, num, TYPE_AFVF); + if (err) + goto free_regions; break; default: - return -EINVAL; + return err; } mw->mbox_wq = alloc_workqueue(name, WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM, num); - if (!mw->mbox_wq) - return -ENOMEM; + if (!mw->mbox_wq) { + err = -ENOMEM; + goto unmap_regions; + } mw->mbox_wrk = devm_kcalloc(rvu->dev, num, sizeof(struct rvu_work), GFP_KERNEL); @@ -1986,23 +2086,13 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw, goto exit; } - /* Mailbox is a reserved memory (in RAM) region shared between - * RVU devices, shouldn't be mapped as device memory to allow - * unaligned accesses. - */ - hwbase = ioremap_wc(bar4_addr, MBOX_SIZE * num); - if (!hwbase) { - dev_err(rvu->dev, "Unable to map mailbox region\n"); - err = -ENOMEM; - goto exit; - } - - err = otx2_mbox_init(&mw->mbox, hwbase, rvu->pdev, reg_base, dir, num); + err = otx2_mbox_regions_init(&mw->mbox, mbox_regions, rvu->pdev, + reg_base, dir, num); if (err) goto exit; - err = otx2_mbox_init(&mw->mbox_up, hwbase, rvu->pdev, - reg_base, dir_up, num); + err = otx2_mbox_regions_init(&mw->mbox_up, mbox_regions, rvu->pdev, + reg_base, dir_up, num); if (err) goto exit; @@ -2015,25 +2105,36 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw, mwork->rvu = rvu; INIT_WORK(&mwork->work, mbox_up_handler); } - + kfree(mbox_regions); return 0; + exit: - if (hwbase) - iounmap((void __iomem *)hwbase); destroy_workqueue(mw->mbox_wq); +unmap_regions: + while (num--) + iounmap((void __iomem *)mbox_regions[num]); +free_regions: + kfree(mbox_regions); return err; } static void rvu_mbox_destroy(struct mbox_wq_info *mw) { + struct otx2_mbox *mbox = &mw->mbox; + struct otx2_mbox_dev *mdev; + int devid; + if (mw->mbox_wq) { flush_workqueue(mw->mbox_wq); destroy_workqueue(mw->mbox_wq); mw->mbox_wq = NULL; } - if (mw->mbox.hwbase) - iounmap((void __iomem *)mw->mbox.hwbase); + for (devid = 0; devid < mbox->ndevs; devid++) { + mdev = &mbox->dev[devid]; + if (mdev->hwbase) + iounmap((void __iomem *)mdev->hwbase); + } otx2_mbox_destroy(&mw->mbox); otx2_mbox_destroy(&mw->mbox_up); @@ -2653,8 +2754,6 @@ static void rvu_enable_afvf_intr(struct rvu *rvu) rvupf_write64(rvu, RVU_PF_VFME_INT_ENA_W1SX(1), INTR_MASK(vfs - 64)); } -#define PCI_DEVID_OCTEONTX2_LBK 0xA061 - int rvu_get_num_lbk_chans(void) { struct pci_dev *pdev; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index ce931d86600b..fa6e46e36ae4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -19,12 +19,15 @@ #include "common.h" #include "mbox.h" #include "npc.h" +#include "rvu_reg.h" /* PCI device IDs */ #define PCI_DEVID_OCTEONTX2_RVU_AF 0xA065 +#define PCI_DEVID_OCTEONTX2_LBK 0xA061 /* Subsystem Device ID */ #define PCI_SUBSYS_DEVID_96XX 0xB200 +#define PCI_SUBSYS_DEVID_CN10K_A 0xB900 /* PCI BAR nos */ #define PCI_AF_REG_BAR_NUM 0 @@ -303,6 +306,8 @@ struct hw_cap { bool nix_shaping; /* Is shaping and coloring supported */ bool nix_tx_link_bp; /* Can link backpressure TL queues ? */ bool nix_rx_multicast; /* Rx packet replication support */ + bool per_pf_mbox_regs; /* PF mbox specified in per PF registers ? */ + bool programmable_chans; /* Channels programmable ? */ }; struct rvu_hwinfo { @@ -311,14 +316,20 @@ struct rvu_hwinfo { u16 max_vfs_per_pf; /* Max VFs that can be attached to a PF */ u8 cgx; u8 lmac_per_cgx; + u16 cgx_chan_base; /* CGX base channel number */ + u16 lbk_chan_base; /* LBK base channel number */ + u16 sdp_chan_base; /* SDP base channel number */ + u16 cpt_chan_base; /* CPT base channel number */ u8 cgx_links; u8 lbk_links; u8 sdp_links; + u8 cpt_links; /* Number of CPT links */ u8 npc_kpus; /* No of parser units */ u8 npc_pkinds; /* No of port kinds */ u8 npc_intfs; /* No of interfaces */ u8 npc_kpu_entries; /* No of KPU entries */ u16 npc_counters; /* No of match stats counters */ + u32 lbk_bufsize; /* FIFO size supported by LBK */ bool npc_ext_set; /* Extended register set */ struct hw_cap cap; @@ -357,6 +368,10 @@ struct rvu_fwdata { u64 msixtr_base; #define FWDATA_RESERVED_MEM 1023 u64 reserved[FWDATA_RESERVED_MEM]; +#define CGX_MAX 5 +#define CGX_LMACS_MAX 4 + struct cgx_lmac_fwdata_s cgx_fw_data[CGX_MAX][CGX_LMACS_MAX]; + /* Do not add new fields below this line */ }; struct ptp; @@ -472,6 +487,59 @@ static inline bool is_rvu_96xx_B0(struct rvu *rvu) (pdev->subsystem_device == PCI_SUBSYS_DEVID_96XX); } +/* REVID for PCIe devices. + * Bits 0..1: minor pass, bit 3..2: major pass + * bits 7..4: midr id + */ +#define PCI_REVISION_ID_96XX 0x00 +#define PCI_REVISION_ID_95XX 0x10 +#define PCI_REVISION_ID_LOKI 0x20 +#define PCI_REVISION_ID_98XX 0x30 +#define PCI_REVISION_ID_95XXMM 0x40 + +static inline bool is_rvu_otx2(struct rvu *rvu) +{ + struct pci_dev *pdev = rvu->pdev; + + u8 midr = pdev->revision & 0xF0; + + return (midr == PCI_REVISION_ID_96XX || midr == PCI_REVISION_ID_95XX || + midr == PCI_REVISION_ID_LOKI || midr == PCI_REVISION_ID_98XX || + midr == PCI_REVISION_ID_95XXMM); +} + +static inline u16 rvu_nix_chan_cgx(struct rvu *rvu, u8 cgxid, + u8 lmacid, u8 chan) +{ + u64 nix_const = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_CONST); + u16 cgx_chans = nix_const & 0xFFULL; + struct rvu_hwinfo *hw = rvu->hw; + + if (!hw->cap.programmable_chans) + return NIX_CHAN_CGX_LMAC_CHX(cgxid, lmacid, chan); + + return rvu->hw->cgx_chan_base + + (cgxid * hw->lmac_per_cgx + lmacid) * cgx_chans + chan; +} + +static inline u16 rvu_nix_chan_lbk(struct rvu *rvu, u8 lbkid, + u8 chan) +{ + u64 nix_const = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_CONST); + u16 lbk_chans = (nix_const >> 16) & 0xFFULL; + struct rvu_hwinfo *hw = rvu->hw; + + if (!hw->cap.programmable_chans) + return NIX_CHAN_LBK_CHX(lbkid, chan); + + return rvu->hw->lbk_chan_base + lbkid * lbk_chans + chan; +} + +static inline u16 rvu_nix_chan_cpt(struct rvu *rvu, u8 chan) +{ + return rvu->hw->cpt_chan_base + chan; +} + /* Function Prototypes * RVU */ @@ -608,9 +676,16 @@ void npc_enable_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, void npc_read_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, u16 src, struct mcam_entry *entry, u8 *intf, u8 *ena); +bool is_mac_feature_supported(struct rvu *rvu, int pf, int feature); +u32 rvu_cgx_get_fifolen(struct rvu *rvu); + /* CPT APIs */ int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int lf, int slot); +/* CN10K RVU */ +int rvu_set_channels_base(struct rvu *rvu); +void rvu_program_channels(struct rvu *rvu); + #ifdef CONFIG_DEBUG_FS void rvu_dbg_init(struct rvu *rvu); void rvu_dbg_exit(struct rvu *rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 6c6b411e78fd..3a1809c28e83 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -14,6 +14,7 @@ #include "rvu.h" #include "cgx.h" +#include "lmac_common.h" #include "rvu_reg.h" #include "rvu_trace.h" @@ -42,6 +43,20 @@ static struct _req_type __maybe_unused \ MBOX_UP_CGX_MESSAGES #undef M +bool is_mac_feature_supported(struct rvu *rvu, int pf, int feature) +{ + u8 cgx_id, lmac_id; + void *cgxd; + + if (!is_pf_cgxmapped(rvu, pf)) + return 0; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + cgxd = rvu_cgx_pdata(cgx_id, rvu); + + return (cgx_features_get(cgxd) & feature); +} + /* Returns bitmap of mapped PFs */ static u16 cgxlmac_to_pfmap(struct rvu *rvu, u8 cgx_id, u8 lmac_id) { @@ -92,9 +107,10 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) { struct npc_pkind *pkind = &rvu->hw->pkind; int cgx_cnt_max = rvu->cgx_cnt_max; - int cgx, lmac_cnt, lmac; int pf = PF_CGXMAP_BASE; + unsigned long lmac_bmap; int size, free_pkind; + int cgx, lmac, iter; if (!cgx_cnt_max) return 0; @@ -125,14 +141,17 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) for (cgx = 0; cgx < cgx_cnt_max; cgx++) { if (!rvu_cgx_pdata(cgx, rvu)) continue; - lmac_cnt = cgx_get_lmac_cnt(rvu_cgx_pdata(cgx, rvu)); - for (lmac = 0; lmac < lmac_cnt; lmac++, pf++) { + lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu)); + for_each_set_bit(iter, &lmac_bmap, MAX_LMAC_PER_CGX) { + lmac = cgx_get_lmacid(rvu_cgx_pdata(cgx, rvu), + iter); rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac); rvu->cgxlmac2pf_map[CGX_OFFSET(cgx) + lmac] = 1 << pf; free_pkind = rvu_alloc_rsrc(&pkind->rsrc); pkind->pfchan_map[free_pkind] = ((pf) & 0x3F) << 16; rvu_map_cgx_nix_block(rvu, pf, cgx, lmac); rvu->cgx_mapped_pfs++; + pf++; } } return 0; @@ -154,8 +173,10 @@ static int rvu_cgx_send_link_info(int cgx_id, int lmac_id, struct rvu *rvu) &qentry->link_event.link_uinfo); qentry->link_event.cgx_id = cgx_id; qentry->link_event.lmac_id = lmac_id; - if (err) + if (err) { + kfree(qentry); goto skip_add; + } list_add_tail(&qentry->evq_node, &rvu->cgx_evq_head); skip_add: spin_unlock_irqrestore(&rvu->cgx_evq_lock, flags); @@ -251,6 +272,7 @@ static void cgx_evhandler_task(struct work_struct *work) static int cgx_lmac_event_handler_init(struct rvu *rvu) { + unsigned long lmac_bmap; struct cgx_event_cb cb; int cgx, lmac, err; void *cgxd; @@ -271,7 +293,8 @@ static int cgx_lmac_event_handler_init(struct rvu *rvu) cgxd = rvu_cgx_pdata(cgx, rvu); if (!cgxd) continue; - for (lmac = 0; lmac < cgx_get_lmac_cnt(cgxd); lmac++) { + lmac_bmap = cgx_get_lmac_bmap(cgxd); + for_each_set_bit(lmac, &lmac_bmap, MAX_LMAC_PER_CGX) { err = cgx_lmac_evh_register(&cb, cgxd, lmac); if (err) dev_err(rvu->dev, @@ -349,6 +372,7 @@ int rvu_cgx_init(struct rvu *rvu) int rvu_cgx_exit(struct rvu *rvu) { + unsigned long lmac_bmap; int cgx, lmac; void *cgxd; @@ -356,7 +380,8 @@ int rvu_cgx_exit(struct rvu *rvu) cgxd = rvu_cgx_pdata(cgx, rvu); if (!cgxd) continue; - for (lmac = 0; lmac < cgx_get_lmac_cnt(cgxd); lmac++) + lmac_bmap = cgx_get_lmac_bmap(cgxd); + for_each_set_bit(lmac, &lmac_bmap, MAX_LMAC_PER_CGX) cgx_lmac_evh_unregister(cgxd, lmac); } @@ -381,6 +406,7 @@ static bool is_cgx_config_permitted(struct rvu *rvu, u16 pcifunc) void rvu_cgx_enadis_rx_bp(struct rvu *rvu, int pf, bool enable) { + struct mac_ops *mac_ops; u8 cgx_id, lmac_id; void *cgxd; @@ -390,11 +416,12 @@ void rvu_cgx_enadis_rx_bp(struct rvu *rvu, int pf, bool enable) rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); cgxd = rvu_cgx_pdata(cgx_id, rvu); + mac_ops = get_mac_ops(cgxd); /* Set / clear CTL_BCK to control pause frame forwarding to NIX */ if (enable) - cgx_lmac_enadis_rx_pause_fwding(cgxd, lmac_id, true); + mac_ops->mac_enadis_rx_pause_fwding(cgxd, lmac_id, true); else - cgx_lmac_enadis_rx_pause_fwding(cgxd, lmac_id, false); + mac_ops->mac_enadis_rx_pause_fwding(cgxd, lmac_id, false); } int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start) @@ -426,10 +453,11 @@ int rvu_mbox_handler_cgx_stop_rxtx(struct rvu *rvu, struct msg_req *req, return 0; } -int rvu_mbox_handler_cgx_stats(struct rvu *rvu, struct msg_req *req, - struct cgx_stats_rsp *rsp) +static int rvu_lmac_get_stats(struct rvu *rvu, struct msg_req *req, + void *rsp) { int pf = rvu_get_pf(req->hdr.pcifunc); + struct mac_ops *mac_ops; int stat = 0, err = 0; u64 tx_stat, rx_stat; u8 cgx_idx, lmac; @@ -440,28 +468,63 @@ int rvu_mbox_handler_cgx_stats(struct rvu *rvu, struct msg_req *req, rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac); cgxd = rvu_cgx_pdata(cgx_idx, rvu); + mac_ops = get_mac_ops(cgxd); /* Rx stats */ - while (stat < CGX_RX_STATS_COUNT) { - err = cgx_get_rx_stats(cgxd, lmac, stat, &rx_stat); + while (stat < mac_ops->rx_stats_cnt) { + err = mac_ops->mac_get_rx_stats(cgxd, lmac, stat, &rx_stat); if (err) return err; - rsp->rx_stats[stat] = rx_stat; + if (mac_ops->rx_stats_cnt == RPM_RX_STATS_COUNT) + ((struct rpm_stats_rsp *)rsp)->rx_stats[stat] = rx_stat; + else + ((struct cgx_stats_rsp *)rsp)->rx_stats[stat] = rx_stat; stat++; } /* Tx stats */ stat = 0; - while (stat < CGX_TX_STATS_COUNT) { - err = cgx_get_tx_stats(cgxd, lmac, stat, &tx_stat); + while (stat < mac_ops->tx_stats_cnt) { + err = mac_ops->mac_get_tx_stats(cgxd, lmac, stat, &tx_stat); if (err) return err; - rsp->tx_stats[stat] = tx_stat; + if (mac_ops->tx_stats_cnt == RPM_TX_STATS_COUNT) + ((struct rpm_stats_rsp *)rsp)->tx_stats[stat] = tx_stat; + else + ((struct cgx_stats_rsp *)rsp)->tx_stats[stat] = tx_stat; stat++; } return 0; } +int rvu_mbox_handler_cgx_stats(struct rvu *rvu, struct msg_req *req, + struct cgx_stats_rsp *rsp) +{ + return rvu_lmac_get_stats(rvu, req, (void *)rsp); +} + +int rvu_mbox_handler_rpm_stats(struct rvu *rvu, struct msg_req *req, + struct rpm_stats_rsp *rsp) +{ + return rvu_lmac_get_stats(rvu, req, (void *)rsp); +} + +int rvu_mbox_handler_cgx_fec_stats(struct rvu *rvu, + struct msg_req *req, + struct cgx_fec_stats_rsp *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + u8 cgx_idx, lmac; + void *cgxd; + + if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc)) + return -EPERM; + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac); + + cgxd = rvu_cgx_pdata(cgx_idx, rvu); + return cgx_get_fec_stats(cgxd, lmac, rsp); +} + int rvu_mbox_handler_cgx_mac_addr_set(struct rvu *rvu, struct cgx_mac_addr_set_or_get *req, struct cgx_mac_addr_set_or_get *rsp) @@ -538,6 +601,9 @@ static int rvu_cgx_ptp_rx_cfg(struct rvu *rvu, u16 pcifunc, bool enable) u8 cgx_id, lmac_id; void *cgxd; + if (!is_mac_feature_supported(rvu, pf, RVU_LMAC_FEAT_PTP)) + return 0; + /* This msg is expected only from PFs that are mapped to CGX LMACs, * if received from other PF/VF simply ACK, nothing to do. */ @@ -624,17 +690,47 @@ int rvu_mbox_handler_cgx_get_linkinfo(struct rvu *rvu, struct msg_req *req, return err; } +int rvu_mbox_handler_cgx_features_get(struct rvu *rvu, + struct msg_req *req, + struct cgx_features_info_msg *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + u8 cgx_idx, lmac; + void *cgxd; + + if (!is_pf_cgxmapped(rvu, pf)) + return 0; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac); + cgxd = rvu_cgx_pdata(cgx_idx, rvu); + rsp->lmac_features = cgx_features_get(cgxd); + + return 0; +} + +u32 rvu_cgx_get_fifolen(struct rvu *rvu) +{ + struct mac_ops *mac_ops; + int rvu_def_cgx_id = 0; + u32 fifo_len; + + mac_ops = get_mac_ops(rvu_cgx_pdata(rvu_def_cgx_id, rvu)); + fifo_len = mac_ops ? mac_ops->fifo_len : 0; + + return fifo_len; +} + static int rvu_cgx_config_intlbk(struct rvu *rvu, u16 pcifunc, bool en) { - int pf = rvu_get_pf(pcifunc); + struct mac_ops *mac_ops; u8 cgx_id, lmac_id; if (!is_cgx_config_permitted(rvu, pcifunc)) return -EPERM; - rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + mac_ops = get_mac_ops(rvu_cgx_pdata(cgx_id, rvu)); - return cgx_lmac_internal_loopback(rvu_cgx_pdata(cgx_id, rvu), + return mac_ops->mac_lmac_intl_lbk(rvu_cgx_pdata(cgx_id, rvu), lmac_id, en); } @@ -657,7 +753,12 @@ int rvu_mbox_handler_cgx_cfg_pause_frm(struct rvu *rvu, struct cgx_pause_frm_cfg *rsp) { int pf = rvu_get_pf(req->hdr.pcifunc); + struct mac_ops *mac_ops; u8 cgx_id, lmac_id; + void *cgxd; + + if (!is_mac_feature_supported(rvu, pf, RVU_LMAC_FEAT_FC)) + return 0; /* This msg is expected only from PF/VFs that are mapped to CGX LMACs, * if received from other PF/VF simply ACK, nothing to do. @@ -666,16 +767,32 @@ int rvu_mbox_handler_cgx_cfg_pause_frm(struct rvu *rvu, return -ENODEV; rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + cgxd = rvu_cgx_pdata(cgx_id, rvu); + mac_ops = get_mac_ops(cgxd); if (req->set) - cgx_lmac_set_pause_frm(rvu_cgx_pdata(cgx_id, rvu), lmac_id, - req->tx_pause, req->rx_pause); + mac_ops->mac_enadis_pause_frm(cgxd, lmac_id, + req->tx_pause, req->rx_pause); else - cgx_lmac_get_pause_frm(rvu_cgx_pdata(cgx_id, rvu), lmac_id, - &rsp->tx_pause, &rsp->rx_pause); + mac_ops->mac_get_pause_frm_status(cgxd, lmac_id, + &rsp->tx_pause, + &rsp->rx_pause); return 0; } +int rvu_mbox_handler_cgx_get_phy_fec_stats(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + u8 cgx_id, lmac_id; + + if (!is_pf_cgxmapped(rvu, pf)) + return -EPERM; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + return cgx_get_phy_fec_stats(rvu_cgx_pdata(cgx_id, rvu), lmac_id); +} + /* Finds cumulative status of NIX rx/tx counters from LF of a PF and those * from its VFs as well. ie. NIX rx/tx counters at the CGX port level */ @@ -767,3 +884,56 @@ exit: mutex_unlock(&rvu->cgx_cfg_lock); return err; } + +int rvu_mbox_handler_cgx_set_fec_param(struct rvu *rvu, + struct fec_mode *req, + struct fec_mode *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + u8 cgx_id, lmac_id; + + if (!is_pf_cgxmapped(rvu, pf)) + return -EPERM; + + if (req->fec == OTX2_FEC_OFF) + req->fec = OTX2_FEC_NONE; + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + rsp->fec = cgx_set_fec(req->fec, cgx_id, lmac_id); + return 0; +} + +int rvu_mbox_handler_cgx_get_aux_link_info(struct rvu *rvu, struct msg_req *req, + struct cgx_fw_data *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + u8 cgx_id, lmac_id; + + if (!rvu->fwdata) + return -ENXIO; + + if (!is_pf_cgxmapped(rvu, pf)) + return -EPERM; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + + memcpy(&rsp->fwdata, &rvu->fwdata->cgx_fw_data[cgx_id][lmac_id], + sizeof(struct cgx_lmac_fwdata_s)); + return 0; +} + +int rvu_mbox_handler_cgx_set_link_mode(struct rvu *rvu, + struct cgx_set_link_mode_req *req, + struct cgx_set_link_mode_rsp *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + u8 cgx_idx, lmac; + void *cgxd; + + if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc)) + return -EPERM; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac); + cgxd = rvu_cgx_pdata(cgx_idx, rvu); + rsp->status = cgx_set_link_mode(cgxd, req->args, cgx_idx, lmac); + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c new file mode 100644 index 000000000000..7d9e71c6965f --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell RPM CN10K driver + * + * Copyright (C) 2020 Marvell. + */ + +#include <linux/bitfield.h> +#include <linux/pci.h> +#include "rvu.h" +#include "cgx.h" +#include "rvu_reg.h" + +int rvu_set_channels_base(struct rvu *rvu) +{ + struct rvu_hwinfo *hw = rvu->hw; + u16 cpt_chan_base; + u64 nix_const; + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return blkaddr; + + nix_const = rvu_read64(rvu, blkaddr, NIX_AF_CONST); + + hw->cgx = (nix_const >> 12) & 0xFULL; + hw->lmac_per_cgx = (nix_const >> 8) & 0xFULL; + hw->cgx_links = hw->cgx * hw->lmac_per_cgx; + hw->lbk_links = (nix_const >> 24) & 0xFULL; + hw->cpt_links = (nix_const >> 44) & 0xFULL; + hw->sdp_links = 1; + + hw->cgx_chan_base = NIX_CHAN_CGX_LMAC_CHX(0, 0, 0); + hw->lbk_chan_base = NIX_CHAN_LBK_CHX(0, 0); + hw->sdp_chan_base = NIX_CHAN_SDP_CH_START; + + /* No Programmable channels */ + if (!(nix_const & BIT_ULL(60))) + return 0; + + hw->cap.programmable_chans = true; + + /* If programmable channels are present then configure + * channels such that all channel numbers are contiguous + * leaving no holes. This way the new CPT channels can be + * accomodated. The order of channel numbers assigned is + * LBK, SDP, CGX and CPT. + */ + hw->sdp_chan_base = hw->lbk_chan_base + hw->lbk_links * + ((nix_const >> 16) & 0xFFULL); + hw->cgx_chan_base = hw->sdp_chan_base + hw->sdp_links * SDP_CHANNELS; + + cpt_chan_base = hw->cgx_chan_base + hw->cgx_links * + (nix_const & 0xFFULL); + + /* Out of 4096 channels start CPT from 2048 so + * that MSB for CPT channels is always set + */ + if (cpt_chan_base <= 0x800) { + hw->cpt_chan_base = 0x800; + } else { + dev_err(rvu->dev, + "CPT channels could not fit in the range 2048-4095\n"); + return -EINVAL; + } + + return 0; +} + +#define LBK_CONNECT_NIXX(a) (0x0 + (a)) + +static void __rvu_lbk_set_chans(struct rvu *rvu, void __iomem *base, + u64 offset, int lbkid, u16 chans) +{ + struct rvu_hwinfo *hw = rvu->hw; + u64 cfg; + + cfg = readq(base + offset); + cfg &= ~(LBK_LINK_CFG_RANGE_MASK | + LBK_LINK_CFG_ID_MASK | LBK_LINK_CFG_BASE_MASK); + cfg |= FIELD_PREP(LBK_LINK_CFG_RANGE_MASK, ilog2(chans)); + cfg |= FIELD_PREP(LBK_LINK_CFG_ID_MASK, lbkid); + cfg |= FIELD_PREP(LBK_LINK_CFG_BASE_MASK, hw->lbk_chan_base); + + writeq(cfg, base + offset); +} + +static void rvu_lbk_set_channels(struct rvu *rvu) +{ + struct pci_dev *pdev = NULL; + void __iomem *base; + u64 lbk_const; + u8 src, dst; + u16 chans; + + /* To loopback packets between multiple NIX blocks + * mutliple LBK blocks are needed. With two NIX blocks, + * four LBK blocks are needed and each LBK block + * source and destination are as follows: + * LBK0 - source NIX0 and destination NIX1 + * LBK1 - source NIX0 and destination NIX1 + * LBK2 - source NIX1 and destination NIX0 + * LBK3 - source NIX1 and destination NIX1 + * As per the HRM channel numbers should be programmed as: + * P2X and X2P of LBK0 as same + * P2X and X2P of LBK3 as same + * P2X of LBK1 and X2P of LBK2 as same + * P2X of LBK2 and X2P of LBK1 as same + */ + while (true) { + pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, + PCI_DEVID_OCTEONTX2_LBK, pdev); + if (!pdev) + return; + + base = pci_ioremap_bar(pdev, 0); + if (!base) + goto err_put; + + lbk_const = readq(base + LBK_CONST); + chans = FIELD_GET(LBK_CONST_CHANS, lbk_const); + dst = FIELD_GET(LBK_CONST_DST, lbk_const); + src = FIELD_GET(LBK_CONST_SRC, lbk_const); + + if (src == dst) { + if (src == LBK_CONNECT_NIXX(0)) { /* LBK0 */ + __rvu_lbk_set_chans(rvu, base, LBK_LINK_CFG_X2P, + 0, chans); + __rvu_lbk_set_chans(rvu, base, LBK_LINK_CFG_P2X, + 0, chans); + } else if (src == LBK_CONNECT_NIXX(1)) { /* LBK3 */ + __rvu_lbk_set_chans(rvu, base, LBK_LINK_CFG_X2P, + 1, chans); + __rvu_lbk_set_chans(rvu, base, LBK_LINK_CFG_P2X, + 1, chans); + } + } else { + if (src == LBK_CONNECT_NIXX(0)) { /* LBK1 */ + __rvu_lbk_set_chans(rvu, base, LBK_LINK_CFG_X2P, + 0, chans); + __rvu_lbk_set_chans(rvu, base, LBK_LINK_CFG_P2X, + 1, chans); + } else if (src == LBK_CONNECT_NIXX(1)) { /* LBK2 */ + __rvu_lbk_set_chans(rvu, base, LBK_LINK_CFG_X2P, + 1, chans); + __rvu_lbk_set_chans(rvu, base, LBK_LINK_CFG_P2X, + 0, chans); + } + } + iounmap(base); + } +err_put: + pci_dev_put(pdev); +} + +static void __rvu_nix_set_channels(struct rvu *rvu, int blkaddr) +{ + u64 nix_const = rvu_read64(rvu, blkaddr, NIX_AF_CONST); + u16 cgx_chans, lbk_chans, sdp_chans, cpt_chans; + struct rvu_hwinfo *hw = rvu->hw; + int link, nix_link = 0; + u16 start; + u64 cfg; + + cgx_chans = nix_const & 0xFFULL; + lbk_chans = (nix_const >> 16) & 0xFFULL; + sdp_chans = SDP_CHANNELS; + cpt_chans = (nix_const >> 32) & 0xFFFULL; + + start = hw->cgx_chan_base; + for (link = 0; link < hw->cgx_links; link++, nix_link++) { + cfg = rvu_read64(rvu, blkaddr, NIX_AF_LINKX_CFG(nix_link)); + cfg &= ~(NIX_AF_LINKX_BASE_MASK | NIX_AF_LINKX_RANGE_MASK); + cfg |= FIELD_PREP(NIX_AF_LINKX_RANGE_MASK, ilog2(cgx_chans)); + cfg |= FIELD_PREP(NIX_AF_LINKX_BASE_MASK, start); + rvu_write64(rvu, blkaddr, NIX_AF_LINKX_CFG(nix_link), cfg); + start += cgx_chans; + } + + start = hw->lbk_chan_base; + for (link = 0; link < hw->lbk_links; link++, nix_link++) { + cfg = rvu_read64(rvu, blkaddr, NIX_AF_LINKX_CFG(nix_link)); + cfg &= ~(NIX_AF_LINKX_BASE_MASK | NIX_AF_LINKX_RANGE_MASK); + cfg |= FIELD_PREP(NIX_AF_LINKX_RANGE_MASK, ilog2(lbk_chans)); + cfg |= FIELD_PREP(NIX_AF_LINKX_BASE_MASK, start); + rvu_write64(rvu, blkaddr, NIX_AF_LINKX_CFG(nix_link), cfg); + start += lbk_chans; + } + + start = hw->sdp_chan_base; + for (link = 0; link < hw->sdp_links; link++, nix_link++) { + cfg = rvu_read64(rvu, blkaddr, NIX_AF_LINKX_CFG(nix_link)); + cfg &= ~(NIX_AF_LINKX_BASE_MASK | NIX_AF_LINKX_RANGE_MASK); + cfg |= FIELD_PREP(NIX_AF_LINKX_RANGE_MASK, ilog2(sdp_chans)); + cfg |= FIELD_PREP(NIX_AF_LINKX_BASE_MASK, start); + rvu_write64(rvu, blkaddr, NIX_AF_LINKX_CFG(nix_link), cfg); + start += sdp_chans; + } + + start = hw->cpt_chan_base; + for (link = 0; link < hw->cpt_links; link++, nix_link++) { + cfg = rvu_read64(rvu, blkaddr, NIX_AF_LINKX_CFG(nix_link)); + cfg &= ~(NIX_AF_LINKX_BASE_MASK | NIX_AF_LINKX_RANGE_MASK); + cfg |= FIELD_PREP(NIX_AF_LINKX_RANGE_MASK, ilog2(cpt_chans)); + cfg |= FIELD_PREP(NIX_AF_LINKX_BASE_MASK, start); + rvu_write64(rvu, blkaddr, NIX_AF_LINKX_CFG(nix_link), cfg); + start += cpt_chans; + } +} + +static void rvu_nix_set_channels(struct rvu *rvu) +{ + int blkaddr = 0; + + blkaddr = rvu_get_next_nix_blkaddr(rvu, blkaddr); + while (blkaddr) { + __rvu_nix_set_channels(rvu, blkaddr); + blkaddr = rvu_get_next_nix_blkaddr(rvu, blkaddr); + } +} + +static void __rvu_rpm_set_channels(int cgxid, int lmacid, u16 base) +{ + u64 cfg; + + cfg = cgx_lmac_read(cgxid, lmacid, RPMX_CMRX_LINK_CFG); + cfg &= ~(RPMX_CMRX_LINK_BASE_MASK | RPMX_CMRX_LINK_RANGE_MASK); + + /* There is no read-only constant register to read + * the number of channels for LMAC and it is always 16. + */ + cfg |= FIELD_PREP(RPMX_CMRX_LINK_RANGE_MASK, ilog2(16)); + cfg |= FIELD_PREP(RPMX_CMRX_LINK_BASE_MASK, base); + cgx_lmac_write(cgxid, lmacid, RPMX_CMRX_LINK_CFG, cfg); +} + +static void rvu_rpm_set_channels(struct rvu *rvu) +{ + struct rvu_hwinfo *hw = rvu->hw; + u16 base = hw->cgx_chan_base; + int cgx, lmac; + + for (cgx = 0; cgx < rvu->cgx_cnt_max; cgx++) { + for (lmac = 0; lmac < hw->lmac_per_cgx; lmac++) { + __rvu_rpm_set_channels(cgx, lmac, base); + base += 16; + } + } +} + +void rvu_program_channels(struct rvu *rvu) +{ + struct rvu_hwinfo *hw = rvu->hw; + + if (!hw->cap.programmable_chans) + return; + + rvu_nix_set_channels(rvu); + rvu_lbk_set_channels(rvu); + rvu_rpm_set_channels(rvu); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 80e964330de3..dfeea587a27e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -19,6 +19,7 @@ #include "rvu_reg.h" #include "rvu.h" #include "cgx.h" +#include "lmac_common.h" #include "npc.h" #define DEBUGFS_DIR_NAME "octeontx2" @@ -109,6 +110,89 @@ static char *cgx_tx_stats_fields[] = { [CGX_STAT17] = "Control/PAUSE packets sent", }; +static char *rpm_rx_stats_fields[] = { + "Octets of received packets", + "Octets of received packets with out error", + "Received packets with alignment errors", + "Control/PAUSE packets received", + "Packets received with Frame too long Errors", + "Packets received with a1nrange length Errors", + "Received packets", + "Packets received with FrameCheckSequenceErrors", + "Packets received with VLAN header", + "Error packets", + "Packets recievd with unicast DMAC", + "Packets received with multicast DMAC", + "Packets received with broadcast DMAC", + "Dropped packets", + "Total frames received on interface", + "Packets received with an octet count < 64", + "Packets received with an octet count == 64", + "Packets received with an octet count of 65–127", + "Packets received with an octet count of 128-255", + "Packets received with an octet count of 256-511", + "Packets received with an octet count of 512-1023", + "Packets received with an octet count of 1024-1518", + "Packets received with an octet count of > 1518", + "Oversized Packets", + "Jabber Packets", + "Fragmented Packets", + "CBFC(class based flow control) pause frames received for class 0", + "CBFC pause frames received for class 1", + "CBFC pause frames received for class 2", + "CBFC pause frames received for class 3", + "CBFC pause frames received for class 4", + "CBFC pause frames received for class 5", + "CBFC pause frames received for class 6", + "CBFC pause frames received for class 7", + "CBFC pause frames received for class 8", + "CBFC pause frames received for class 9", + "CBFC pause frames received for class 10", + "CBFC pause frames received for class 11", + "CBFC pause frames received for class 12", + "CBFC pause frames received for class 13", + "CBFC pause frames received for class 14", + "CBFC pause frames received for class 15", + "MAC control packets received", +}; + +static char *rpm_tx_stats_fields[] = { + "Total octets sent on the interface", + "Total octets transmitted OK", + "Control/Pause frames sent", + "Total frames transmitted OK", + "Total frames sent with VLAN header", + "Error Packets", + "Packets sent to unicast DMAC", + "Packets sent to the multicast DMAC", + "Packets sent to a broadcast DMAC", + "Packets sent with an octet count == 64", + "Packets sent with an octet count of 65–127", + "Packets sent with an octet count of 128-255", + "Packets sent with an octet count of 256-511", + "Packets sent with an octet count of 512-1023", + "Packets sent with an octet count of 1024-1518", + "Packets sent with an octet count of > 1518", + "CBFC(class based flow control) pause frames transmitted for class 0", + "CBFC pause frames transmitted for class 1", + "CBFC pause frames transmitted for class 2", + "CBFC pause frames transmitted for class 3", + "CBFC pause frames transmitted for class 4", + "CBFC pause frames transmitted for class 5", + "CBFC pause frames transmitted for class 6", + "CBFC pause frames transmitted for class 7", + "CBFC pause frames transmitted for class 8", + "CBFC pause frames transmitted for class 9", + "CBFC pause frames transmitted for class 10", + "CBFC pause frames transmitted for class 11", + "CBFC pause frames transmitted for class 12", + "CBFC pause frames transmitted for class 13", + "CBFC pause frames transmitted for class 14", + "CBFC pause frames transmitted for class 15", + "MAC control packets sent", + "Total frames sent on the interface" +}; + enum cpt_eng_type { CPT_AE_TYPE = 1, CPT_SE_TYPE = 2, @@ -234,6 +318,8 @@ static int rvu_dbg_rvu_pf_cgx_map_display(struct seq_file *filp, void *unused) { struct rvu *rvu = filp->private; struct pci_dev *pdev = NULL; + struct mac_ops *mac_ops; + int rvu_def_cgx_id = 0; char cgx[10], lmac[10]; struct rvu_pfvf *pfvf; int pf, domain, blkid; @@ -241,7 +327,9 @@ static int rvu_dbg_rvu_pf_cgx_map_display(struct seq_file *filp, void *unused) u16 pcifunc; domain = 2; - seq_puts(filp, "PCI dev\t\tRVU PF Func\tNIX block\tCGX\tLMAC\n"); + mac_ops = get_mac_ops(rvu_cgx_pdata(rvu_def_cgx_id, rvu)); + seq_printf(filp, "PCI dev\t\tRVU PF Func\tNIX block\t%s\tLMAC\n", + mac_ops->name); for (pf = 0; pf < rvu->hw->total_pfs; pf++) { if (!is_pf_cgxmapped(rvu, pf)) continue; @@ -262,7 +350,7 @@ static int rvu_dbg_rvu_pf_cgx_map_display(struct seq_file *filp, void *unused) rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); - sprintf(cgx, "CGX%d", cgx_id); + sprintf(cgx, "%s%d", mac_ops->name, cgx_id); sprintf(lmac, "LMAC%d", lmac_id); seq_printf(filp, "%s\t0x%x\t\tNIX%d\t\t%s\t%s\n", dev_name(&pdev->dev), pcifunc, blkid, cgx, lmac); @@ -449,6 +537,7 @@ RVU_DEBUG_SEQ_FOPS(npa_qsize, npa_qsize_display, npa_qsize_write); static void print_npa_aura_ctx(struct seq_file *m, struct npa_aq_enq_rsp *rsp) { struct npa_aura_s *aura = &rsp->aura; + struct rvu *rvu = m->private; seq_printf(m, "W0: Pool addr\t\t%llx\n", aura->pool_addr); @@ -468,6 +557,9 @@ static void print_npa_aura_ctx(struct seq_file *m, struct npa_aq_enq_rsp *rsp) seq_printf(m, "W3: limit\t\t%llu\nW3: bp\t\t\t%d\nW3: fc_ena\t\t%d\n", (u64)aura->limit, aura->bp, aura->fc_ena); + + if (!is_rvu_otx2(rvu)) + seq_printf(m, "W3: fc_be\t\t%d\n", aura->fc_be); seq_printf(m, "W3: fc_up_crossing\t%d\nW3: fc_stype\t\t%d\n", aura->fc_up_crossing, aura->fc_stype); seq_printf(m, "W3: fc_hyst_bits\t%d\n", aura->fc_hyst_bits); @@ -485,12 +577,15 @@ static void print_npa_aura_ctx(struct seq_file *m, struct npa_aq_enq_rsp *rsp) seq_printf(m, "W5: err_qint_idx \t%d\n", aura->err_qint_idx); seq_printf(m, "W6: thresh\t\t%llu\n", (u64)aura->thresh); + if (!is_rvu_otx2(rvu)) + seq_printf(m, "W6: fc_msh_dst\t\t%d\n", aura->fc_msh_dst); } /* Dumps given NPA Pool's context */ static void print_npa_pool_ctx(struct seq_file *m, struct npa_aq_enq_rsp *rsp) { struct npa_pool_s *pool = &rsp->pool; + struct rvu *rvu = m->private; seq_printf(m, "W0: Stack base\t\t%llx\n", pool->stack_base); @@ -512,6 +607,8 @@ static void print_npa_pool_ctx(struct seq_file *m, struct npa_aq_enq_rsp *rsp) pool->avg_con, pool->fc_ena, pool->fc_stype); seq_printf(m, "W4: fc_hyst_bits\t%d\nW4: fc_up_crossing\t%d\n", pool->fc_hyst_bits, pool->fc_up_crossing); + if (!is_rvu_otx2(rvu)) + seq_printf(m, "W4: fc_be\t\t%d\n", pool->fc_be); seq_printf(m, "W4: update_time\t\t%d\n", pool->update_time); seq_printf(m, "W5: fc_addr\t\t%llx\n", pool->fc_addr); @@ -525,8 +622,10 @@ static void print_npa_pool_ctx(struct seq_file *m, struct npa_aq_enq_rsp *rsp) seq_printf(m, "W8: thresh_int\t\t%d\n", pool->thresh_int); seq_printf(m, "W8: thresh_int_ena\t%d\nW8: thresh_up\t\t%d\n", pool->thresh_int_ena, pool->thresh_up); - seq_printf(m, "W8: thresh_qint_idx\t%d\nW8: err_qint_idx\t\t%d\n", + seq_printf(m, "W8: thresh_qint_idx\t%d\nW8: err_qint_idx\t%d\n", pool->thresh_qint_idx, pool->err_qint_idx); + if (!is_rvu_otx2(rvu)) + seq_printf(m, "W8: fc_msh_dst\t\t%d\n", pool->fc_msh_dst); } /* Reads aura/pool's ctx from admin queue */ @@ -910,11 +1009,78 @@ static int rvu_dbg_nix_ndc_tx_hits_miss_display(struct seq_file *filp, RVU_DEBUG_SEQ_FOPS(nix_ndc_tx_hits_miss, nix_ndc_tx_hits_miss_display, NULL); +static void print_nix_cn10k_sq_ctx(struct seq_file *m, + struct nix_cn10k_sq_ctx_s *sq_ctx) +{ + seq_printf(m, "W0: ena \t\t\t%d\nW0: qint_idx \t\t\t%d\n", + sq_ctx->ena, sq_ctx->qint_idx); + seq_printf(m, "W0: substream \t\t\t0x%03x\nW0: sdp_mcast \t\t\t%d\n", + sq_ctx->substream, sq_ctx->sdp_mcast); + seq_printf(m, "W0: cq \t\t\t\t%d\nW0: sqe_way_mask \t\t%d\n\n", + sq_ctx->cq, sq_ctx->sqe_way_mask); + + seq_printf(m, "W1: smq \t\t\t%d\nW1: cq_ena \t\t\t%d\nW1: xoff\t\t\t%d\n", + sq_ctx->smq, sq_ctx->cq_ena, sq_ctx->xoff); + seq_printf(m, "W1: sso_ena \t\t\t%d\nW1: smq_rr_weight\t\t%d\n", + sq_ctx->sso_ena, sq_ctx->smq_rr_weight); + seq_printf(m, "W1: default_chan\t\t%d\nW1: sqb_count\t\t\t%d\n\n", + sq_ctx->default_chan, sq_ctx->sqb_count); + + seq_printf(m, "W2: smq_rr_count_lb \t\t%d\n", sq_ctx->smq_rr_count_lb); + seq_printf(m, "W2: smq_rr_count_ub \t\t%d\n", sq_ctx->smq_rr_count_ub); + seq_printf(m, "W2: sqb_aura \t\t\t%d\nW2: sq_int \t\t\t%d\n", + sq_ctx->sqb_aura, sq_ctx->sq_int); + seq_printf(m, "W2: sq_int_ena \t\t\t%d\nW2: sqe_stype \t\t\t%d\n", + sq_ctx->sq_int_ena, sq_ctx->sqe_stype); + + seq_printf(m, "W3: max_sqe_size\t\t%d\nW3: cq_limit\t\t\t%d\n", + sq_ctx->max_sqe_size, sq_ctx->cq_limit); + seq_printf(m, "W3: lmt_dis \t\t\t%d\nW3: mnq_dis \t\t\t%d\n", + sq_ctx->mnq_dis, sq_ctx->lmt_dis); + seq_printf(m, "W3: smq_next_sq\t\t\t%d\nW3: smq_lso_segnum\t\t%d\n", + sq_ctx->smq_next_sq, sq_ctx->smq_lso_segnum); + seq_printf(m, "W3: tail_offset \t\t%d\nW3: smenq_offset\t\t%d\n", + sq_ctx->tail_offset, sq_ctx->smenq_offset); + seq_printf(m, "W3: head_offset\t\t\t%d\nW3: smenq_next_sqb_vld\t\t%d\n\n", + sq_ctx->head_offset, sq_ctx->smenq_next_sqb_vld); + + seq_printf(m, "W4: next_sqb \t\t\t%llx\n\n", sq_ctx->next_sqb); + seq_printf(m, "W5: tail_sqb \t\t\t%llx\n\n", sq_ctx->tail_sqb); + seq_printf(m, "W6: smenq_sqb \t\t\t%llx\n\n", sq_ctx->smenq_sqb); + seq_printf(m, "W7: smenq_next_sqb \t\t%llx\n\n", + sq_ctx->smenq_next_sqb); + + seq_printf(m, "W8: head_sqb\t\t\t%llx\n\n", sq_ctx->head_sqb); + + seq_printf(m, "W9: vfi_lso_total\t\t%d\n", sq_ctx->vfi_lso_total); + seq_printf(m, "W9: vfi_lso_sizem1\t\t%d\nW9: vfi_lso_sb\t\t\t%d\n", + sq_ctx->vfi_lso_sizem1, sq_ctx->vfi_lso_sb); + seq_printf(m, "W9: vfi_lso_mps\t\t\t%d\nW9: vfi_lso_vlan0_ins_ena\t%d\n", + sq_ctx->vfi_lso_mps, sq_ctx->vfi_lso_vlan0_ins_ena); + seq_printf(m, "W9: vfi_lso_vlan1_ins_ena\t%d\nW9: vfi_lso_vld \t\t%d\n\n", + sq_ctx->vfi_lso_vld, sq_ctx->vfi_lso_vlan1_ins_ena); + + seq_printf(m, "W10: scm_lso_rem \t\t%llu\n\n", + (u64)sq_ctx->scm_lso_rem); + seq_printf(m, "W11: octs \t\t\t%llu\n\n", (u64)sq_ctx->octs); + seq_printf(m, "W12: pkts \t\t\t%llu\n\n", (u64)sq_ctx->pkts); + seq_printf(m, "W14: dropped_octs \t\t%llu\n\n", + (u64)sq_ctx->dropped_octs); + seq_printf(m, "W15: dropped_pkts \t\t%llu\n\n", + (u64)sq_ctx->dropped_pkts); +} + /* Dumps given nix_sq's context */ static void print_nix_sq_ctx(struct seq_file *m, struct nix_aq_enq_rsp *rsp) { struct nix_sq_ctx_s *sq_ctx = &rsp->sq; + struct nix_hw *nix_hw = m->private; + struct rvu *rvu = nix_hw->rvu; + if (!is_rvu_otx2(rvu)) { + print_nix_cn10k_sq_ctx(m, (struct nix_cn10k_sq_ctx_s *)sq_ctx); + return; + } seq_printf(m, "W0: sqe_way_mask \t\t%d\nW0: cq \t\t\t\t%d\n", sq_ctx->sqe_way_mask, sq_ctx->cq); seq_printf(m, "W0: sdp_mcast \t\t\t%d\nW0: substream \t\t\t0x%03x\n", @@ -974,10 +1140,94 @@ static void print_nix_sq_ctx(struct seq_file *m, struct nix_aq_enq_rsp *rsp) (u64)sq_ctx->dropped_pkts); } +static void print_nix_cn10k_rq_ctx(struct seq_file *m, + struct nix_cn10k_rq_ctx_s *rq_ctx) +{ + seq_printf(m, "W0: ena \t\t\t%d\nW0: sso_ena \t\t\t%d\n", + rq_ctx->ena, rq_ctx->sso_ena); + seq_printf(m, "W0: ipsech_ena \t\t\t%d\nW0: ena_wqwd \t\t\t%d\n", + rq_ctx->ipsech_ena, rq_ctx->ena_wqwd); + seq_printf(m, "W0: cq \t\t\t\t%d\nW0: lenerr_dis \t\t\t%d\n", + rq_ctx->cq, rq_ctx->lenerr_dis); + seq_printf(m, "W0: csum_il4_dis \t\t%d\nW0: csum_ol4_dis \t\t%d\n", + rq_ctx->csum_il4_dis, rq_ctx->csum_ol4_dis); + seq_printf(m, "W0: len_il4_dis \t\t%d\nW0: len_il3_dis \t\t%d\n", + rq_ctx->len_il4_dis, rq_ctx->len_il3_dis); + seq_printf(m, "W0: len_ol4_dis \t\t%d\nW0: len_ol3_dis \t\t%d\n", + rq_ctx->len_ol4_dis, rq_ctx->len_ol3_dis); + seq_printf(m, "W0: wqe_aura \t\t\t%d\n\n", rq_ctx->wqe_aura); + + seq_printf(m, "W1: spb_aura \t\t\t%d\nW1: lpb_aura \t\t\t%d\n", + rq_ctx->spb_aura, rq_ctx->lpb_aura); + seq_printf(m, "W1: spb_aura \t\t\t%d\n", rq_ctx->spb_aura); + seq_printf(m, "W1: sso_grp \t\t\t%d\nW1: sso_tt \t\t\t%d\n", + rq_ctx->sso_grp, rq_ctx->sso_tt); + seq_printf(m, "W1: pb_caching \t\t\t%d\nW1: wqe_caching \t\t%d\n", + rq_ctx->pb_caching, rq_ctx->wqe_caching); + seq_printf(m, "W1: xqe_drop_ena \t\t%d\nW1: spb_drop_ena \t\t%d\n", + rq_ctx->xqe_drop_ena, rq_ctx->spb_drop_ena); + seq_printf(m, "W1: lpb_drop_ena \t\t%d\nW1: pb_stashing \t\t%d\n", + rq_ctx->lpb_drop_ena, rq_ctx->pb_stashing); + seq_printf(m, "W1: ipsecd_drop_ena \t\t%d\nW1: chi_ena \t\t\t%d\n\n", + rq_ctx->ipsecd_drop_ena, rq_ctx->chi_ena); + + seq_printf(m, "W2: band_prof_id \t\t%d\n", rq_ctx->band_prof_id); + seq_printf(m, "W2: policer_ena \t\t%d\n", rq_ctx->policer_ena); + seq_printf(m, "W2: spb_sizem1 \t\t\t%d\n", rq_ctx->spb_sizem1); + seq_printf(m, "W2: wqe_skip \t\t\t%d\nW2: sqb_ena \t\t\t%d\n", + rq_ctx->wqe_skip, rq_ctx->spb_ena); + seq_printf(m, "W2: lpb_size1 \t\t\t%d\nW2: first_skip \t\t\t%d\n", + rq_ctx->lpb_sizem1, rq_ctx->first_skip); + seq_printf(m, "W2: later_skip\t\t\t%d\nW2: xqe_imm_size\t\t%d\n", + rq_ctx->later_skip, rq_ctx->xqe_imm_size); + seq_printf(m, "W2: xqe_imm_copy \t\t%d\nW2: xqe_hdr_split \t\t%d\n\n", + rq_ctx->xqe_imm_copy, rq_ctx->xqe_hdr_split); + + seq_printf(m, "W3: xqe_drop \t\t\t%d\nW3: xqe_pass \t\t\t%d\n", + rq_ctx->xqe_drop, rq_ctx->xqe_pass); + seq_printf(m, "W3: wqe_pool_drop \t\t%d\nW3: wqe_pool_pass \t\t%d\n", + rq_ctx->wqe_pool_drop, rq_ctx->wqe_pool_pass); + seq_printf(m, "W3: spb_pool_drop \t\t%d\nW3: spb_pool_pass \t\t%d\n", + rq_ctx->spb_pool_drop, rq_ctx->spb_pool_pass); + seq_printf(m, "W3: spb_aura_drop \t\t%d\nW3: spb_aura_pass \t\t%d\n\n", + rq_ctx->spb_aura_pass, rq_ctx->spb_aura_drop); + + seq_printf(m, "W4: lpb_aura_drop \t\t%d\nW3: lpb_aura_pass \t\t%d\n", + rq_ctx->lpb_aura_pass, rq_ctx->lpb_aura_drop); + seq_printf(m, "W4: lpb_pool_drop \t\t%d\nW3: lpb_pool_pass \t\t%d\n", + rq_ctx->lpb_pool_drop, rq_ctx->lpb_pool_pass); + seq_printf(m, "W4: rq_int \t\t\t%d\nW4: rq_int_ena\t\t\t%d\n", + rq_ctx->rq_int, rq_ctx->rq_int_ena); + seq_printf(m, "W4: qint_idx \t\t\t%d\n\n", rq_ctx->qint_idx); + + seq_printf(m, "W5: ltag \t\t\t%d\nW5: good_utag \t\t\t%d\n", + rq_ctx->ltag, rq_ctx->good_utag); + seq_printf(m, "W5: bad_utag \t\t\t%d\nW5: flow_tagw \t\t\t%d\n", + rq_ctx->bad_utag, rq_ctx->flow_tagw); + seq_printf(m, "W5: ipsec_vwqe \t\t\t%d\nW5: vwqe_ena \t\t\t%d\n", + rq_ctx->ipsec_vwqe, rq_ctx->vwqe_ena); + seq_printf(m, "W5: vwqe_wait \t\t\t%d\nW5: max_vsize_exp\t\t%d\n", + rq_ctx->vwqe_wait, rq_ctx->max_vsize_exp); + seq_printf(m, "W5: vwqe_skip \t\t\t%d\n\n", rq_ctx->vwqe_skip); + + seq_printf(m, "W6: octs \t\t\t%llu\n\n", (u64)rq_ctx->octs); + seq_printf(m, "W7: pkts \t\t\t%llu\n\n", (u64)rq_ctx->pkts); + seq_printf(m, "W8: drop_octs \t\t\t%llu\n\n", (u64)rq_ctx->drop_octs); + seq_printf(m, "W9: drop_pkts \t\t\t%llu\n\n", (u64)rq_ctx->drop_pkts); + seq_printf(m, "W10: re_pkts \t\t\t%llu\n", (u64)rq_ctx->re_pkts); +} + /* Dumps given nix_rq's context */ static void print_nix_rq_ctx(struct seq_file *m, struct nix_aq_enq_rsp *rsp) { struct nix_rq_ctx_s *rq_ctx = &rsp->rq; + struct nix_hw *nix_hw = m->private; + struct rvu *rvu = nix_hw->rvu; + + if (!is_rvu_otx2(rvu)) { + print_nix_cn10k_rq_ctx(m, (struct nix_cn10k_rq_ctx_s *)rq_ctx); + return; + } seq_printf(m, "W0: wqe_aura \t\t\t%d\nW0: substream \t\t\t0x%03x\n", rq_ctx->wqe_aura, rq_ctx->substream); @@ -1439,6 +1689,7 @@ static void rvu_dbg_npa_init(struct rvu *rvu) static int cgx_print_stats(struct seq_file *s, int lmac_id) { struct cgx_link_user_info linfo; + struct mac_ops *mac_ops; void *cgxd = s->private; u64 ucast, mcast, bcast; int stat = 0, err = 0; @@ -1450,6 +1701,11 @@ static int cgx_print_stats(struct seq_file *s, int lmac_id) if (!rvu) return -ENODEV; + mac_ops = get_mac_ops(cgxd); + + if (!mac_ops) + return 0; + /* Link status */ seq_puts(s, "\n=======Link Status======\n\n"); err = cgx_get_link_info(cgxd, lmac_id, &linfo); @@ -1459,7 +1715,8 @@ static int cgx_print_stats(struct seq_file *s, int lmac_id) linfo.link_up ? "UP" : "DOWN", linfo.speed); /* Rx stats */ - seq_puts(s, "\n=======NIX RX_STATS(CGX port level)======\n\n"); + seq_printf(s, "\n=======NIX RX_STATS(%s port level)======\n\n", + mac_ops->name); ucast = PRINT_CGX_CUML_NIXRX_STATUS(RX_UCAST, "rx_ucast_frames"); if (err) return err; @@ -1481,7 +1738,8 @@ static int cgx_print_stats(struct seq_file *s, int lmac_id) return err; /* Tx stats */ - seq_puts(s, "\n=======NIX TX_STATS(CGX port level)======\n\n"); + seq_printf(s, "\n=======NIX TX_STATS(%s port level)======\n\n", + mac_ops->name); ucast = PRINT_CGX_CUML_NIXTX_STATUS(TX_UCAST, "tx_ucast_frames"); if (err) return err; @@ -1500,24 +1758,35 @@ static int cgx_print_stats(struct seq_file *s, int lmac_id) return err; /* Rx stats */ - seq_puts(s, "\n=======CGX RX_STATS======\n\n"); - while (stat < CGX_RX_STATS_COUNT) { - err = cgx_get_rx_stats(cgxd, lmac_id, stat, &rx_stat); + seq_printf(s, "\n=======%s RX_STATS======\n\n", mac_ops->name); + while (stat < mac_ops->rx_stats_cnt) { + err = mac_ops->mac_get_rx_stats(cgxd, lmac_id, stat, &rx_stat); if (err) return err; - seq_printf(s, "%s: %llu\n", cgx_rx_stats_fields[stat], rx_stat); + if (is_rvu_otx2(rvu)) + seq_printf(s, "%s: %llu\n", cgx_rx_stats_fields[stat], + rx_stat); + else + seq_printf(s, "%s: %llu\n", rpm_rx_stats_fields[stat], + rx_stat); stat++; } /* Tx stats */ stat = 0; - seq_puts(s, "\n=======CGX TX_STATS======\n\n"); - while (stat < CGX_TX_STATS_COUNT) { - err = cgx_get_tx_stats(cgxd, lmac_id, stat, &tx_stat); + seq_printf(s, "\n=======%s TX_STATS======\n\n", mac_ops->name); + while (stat < mac_ops->tx_stats_cnt) { + err = mac_ops->mac_get_tx_stats(cgxd, lmac_id, stat, &tx_stat); if (err) return err; - seq_printf(s, "%s: %llu\n", cgx_tx_stats_fields[stat], tx_stat); - stat++; + + if (is_rvu_otx2(rvu)) + seq_printf(s, "%s: %llu\n", cgx_tx_stats_fields[stat], + tx_stat); + else + seq_printf(s, "%s: %llu\n", rpm_tx_stats_fields[stat], + tx_stat); + stat++; } return err; @@ -1547,21 +1816,34 @@ RVU_DEBUG_SEQ_FOPS(cgx_stat, cgx_stat_display, NULL); static void rvu_dbg_cgx_init(struct rvu *rvu) { + struct mac_ops *mac_ops; + unsigned long lmac_bmap; + int rvu_def_cgx_id = 0; int i, lmac_id; char dname[20]; void *cgx; - rvu->rvu_dbg.cgx_root = debugfs_create_dir("cgx", rvu->rvu_dbg.root); + if (!cgx_get_cgxcnt_max()) + return; + + mac_ops = get_mac_ops(rvu_cgx_pdata(rvu_def_cgx_id, rvu)); + if (!mac_ops) + return; + + rvu->rvu_dbg.cgx_root = debugfs_create_dir(mac_ops->name, + rvu->rvu_dbg.root); for (i = 0; i < cgx_get_cgxcnt_max(); i++) { cgx = rvu_cgx_pdata(i, rvu); if (!cgx) continue; + lmac_bmap = cgx_get_lmac_bmap(cgx); /* cgx debugfs dir */ - sprintf(dname, "cgx%d", i); + sprintf(dname, "%s%d", mac_ops->name, i); rvu->rvu_dbg.cgx = debugfs_create_dir(dname, rvu->rvu_dbg.cgx_root); - for (lmac_id = 0; lmac_id < cgx_get_lmac_cnt(cgx); lmac_id++) { + + for_each_set_bit(lmac_id, &lmac_bmap, MAX_LMAC_PER_CGX) { /* lmac debugfs dir */ sprintf(dname, "lmac%d", lmac_id); rvu->rvu_dbg.lmac = @@ -2128,15 +2410,32 @@ static void rvu_dbg_cpt_init(struct rvu *rvu, int blkaddr) &rvu_dbg_cpt_err_info_fops); } +static const char *rvu_get_dbg_dir_name(struct rvu *rvu) +{ + if (!is_rvu_otx2(rvu)) + return "cn10k"; + else + return "octeontx2"; +} + void rvu_dbg_init(struct rvu *rvu) { - rvu->rvu_dbg.root = debugfs_create_dir(DEBUGFS_DIR_NAME, NULL); + rvu->rvu_dbg.root = debugfs_create_dir(rvu_get_dbg_dir_name(rvu), NULL); debugfs_create_file("rsrc_alloc", 0444, rvu->rvu_dbg.root, rvu, &rvu_dbg_rsrc_status_fops); - debugfs_create_file("rvu_pf_cgx_map", 0444, rvu->rvu_dbg.root, rvu, - &rvu_dbg_rvu_pf_cgx_map_fops); + if (!cgx_get_cgxcnt_max()) + goto create; + + if (is_rvu_otx2(rvu)) + debugfs_create_file("rvu_pf_cgx_map", 0444, rvu->rvu_dbg.root, + rvu, &rvu_dbg_rvu_pf_cgx_map_fops); + else + debugfs_create_file("rvu_pf_cgx_map", 0444, rvu->rvu_dbg.root, + rvu, &rvu_dbg_rvu_pf_cgx_map_fops); + +create: rvu_dbg_npa_init(rvu); rvu_dbg_nix_init(rvu, BLKADDR_NIX0); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index b54753ef7d94..d3000194e2d3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -16,6 +16,7 @@ #include "rvu.h" #include "npc.h" #include "cgx.h" +#include "lmac_common.h" static void nix_free_tx_vtag_entries(struct rvu *rvu, u16 pcifunc); static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req, @@ -214,6 +215,7 @@ static bool is_valid_txschq(struct rvu *rvu, int blkaddr, static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); + struct mac_ops *mac_ops; int pkind, pf, vf, lbkid; u8 cgx_id, lmac_id; int err; @@ -233,17 +235,19 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf) "PF_Func 0x%x: Invalid pkind\n", pcifunc); return -EINVAL; } - pfvf->rx_chan_base = NIX_CHAN_CGX_LMAC_CHX(cgx_id, lmac_id, 0); + pfvf->rx_chan_base = rvu_nix_chan_cgx(rvu, cgx_id, lmac_id, 0); pfvf->tx_chan_base = pfvf->rx_chan_base; pfvf->rx_chan_cnt = 1; pfvf->tx_chan_cnt = 1; cgx_set_pkind(rvu_cgx_pdata(cgx_id, rvu), lmac_id, pkind); rvu_npc_set_pkind(rvu, pkind, pfvf); + mac_ops = get_mac_ops(rvu_cgx_pdata(cgx_id, rvu)); /* By default we enable pause frames */ if ((pcifunc & RVU_PFVF_FUNC_MASK) == 0) - cgx_lmac_set_pause_frm(rvu_cgx_pdata(cgx_id, rvu), - lmac_id, true, true); + mac_ops->mac_enadis_pause_frm(rvu_cgx_pdata(cgx_id, + rvu), + lmac_id, true, true); break; case NIX_INTF_TYPE_LBK: vf = (pcifunc & RVU_PFVF_FUNC_MASK) - 1; @@ -262,10 +266,10 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf) * loopback channels.Therefore if odd number of AF VFs are * enabled then the last VF remains with no pair. */ - pfvf->rx_chan_base = NIX_CHAN_LBK_CHX(lbkid, vf); + pfvf->rx_chan_base = rvu_nix_chan_lbk(rvu, lbkid, vf); pfvf->tx_chan_base = vf & 0x1 ? - NIX_CHAN_LBK_CHX(lbkid, vf - 1) : - NIX_CHAN_LBK_CHX(lbkid, vf + 1); + rvu_nix_chan_lbk(rvu, lbkid, vf - 1) : + rvu_nix_chan_lbk(rvu, lbkid, vf + 1); pfvf->rx_chan_cnt = 1; pfvf->tx_chan_cnt = 1; rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf, @@ -1000,6 +1004,14 @@ int rvu_mbox_handler_nix_aq_enq(struct rvu *rvu, return rvu_nix_aq_enq_inst(rvu, req, rsp); } #endif +/* CN10K mbox handler */ +int rvu_mbox_handler_nix_cn10k_aq_enq(struct rvu *rvu, + struct nix_cn10k_aq_enq_req *req, + struct nix_cn10k_aq_enq_rsp *rsp) +{ + return rvu_nix_aq_enq_inst(rvu, (struct nix_aq_enq_req *)req, + (struct nix_aq_enq_rsp *)rsp); +} int rvu_mbox_handler_nix_hwctx_disable(struct rvu *rvu, struct hwctx_disable_req *req, @@ -2535,6 +2547,43 @@ static int nix_af_mark_format_setup(struct rvu *rvu, struct nix_hw *nix_hw, return 0; } +static void rvu_get_lbk_link_max_frs(struct rvu *rvu, u16 *max_mtu) +{ + /* CN10K supports LBK FIFO size 72 KB */ + if (rvu->hw->lbk_bufsize == 0x12000) + *max_mtu = CN10K_LBK_LINK_MAX_FRS; + else + *max_mtu = NIC_HW_MAX_FRS; +} + +static void rvu_get_lmac_link_max_frs(struct rvu *rvu, u16 *max_mtu) +{ + /* RPM supports FIFO len 128 KB */ + if (rvu_cgx_get_fifolen(rvu) == 0x20000) + *max_mtu = CN10K_LMAC_LINK_MAX_FRS; + else + *max_mtu = NIC_HW_MAX_FRS; +} + +int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req, + struct nix_hw_info *rsp) +{ + u16 pcifunc = req->hdr.pcifunc; + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + if (blkaddr < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + if (is_afvf(pcifunc)) + rvu_get_lbk_link_max_frs(rvu, &rsp->max_mtu); + else + rvu_get_lmac_link_max_frs(rvu, &rsp->max_mtu); + + rsp->min_mtu = NIC_HW_MIN_FRS; + return 0; +} + int rvu_mbox_handler_nix_stats_rst(struct rvu *rvu, struct msg_req *req, struct msg_rsp *rsp) { @@ -3099,6 +3148,7 @@ int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req, u64 cfg, lmac_fifo_len; struct nix_hw *nix_hw; u8 cgx = 0, lmac = 0; + u16 max_mtu; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); if (blkaddr < 0) @@ -3108,7 +3158,12 @@ int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req, if (!nix_hw) return -EINVAL; - if (!req->sdp_link && req->maxlen > NIC_HW_MAX_FRS) + if (is_afvf(pcifunc)) + rvu_get_lbk_link_max_frs(rvu, &max_mtu); + else + rvu_get_lmac_link_max_frs(rvu, &max_mtu); + + if (!req->sdp_link && req->maxlen > max_mtu) return NIX_AF_ERR_FRS_INVALID; if (req->update_minlen && req->minlen < NIC_HW_MIN_FRS) @@ -3168,7 +3223,8 @@ linkcfg: /* Update transmit credits for CGX links */ lmac_fifo_len = - CGX_FIFO_LEN / cgx_get_lmac_cnt(rvu_cgx_pdata(cgx, rvu)); + rvu_cgx_get_fifolen(rvu) / + cgx_get_lmac_cnt(rvu_cgx_pdata(cgx, rvu)); cfg = rvu_read64(rvu, blkaddr, NIX_AF_TX_LINKX_NORM_CREDIT(link)); cfg &= ~(0xFFFFFULL << 12); cfg |= ((lmac_fifo_len - req->maxlen) / 16) << 12; @@ -3208,23 +3264,40 @@ int rvu_mbox_handler_nix_set_rx_cfg(struct rvu *rvu, struct nix_rx_cfg *req, return 0; } +static u64 rvu_get_lbk_link_credits(struct rvu *rvu, u16 lbk_max_frs) +{ + /* CN10k supports 72KB FIFO size and max packet size of 64k */ + if (rvu->hw->lbk_bufsize == 0x12000) + return (rvu->hw->lbk_bufsize - lbk_max_frs) / 16; + + return 1600; /* 16 * max LBK datarate = 16 * 100Gbps */ +} + static void nix_link_config(struct rvu *rvu, int blkaddr) { struct rvu_hwinfo *hw = rvu->hw; int cgx, lmac_cnt, slink, link; + u16 lbk_max_frs, lmac_max_frs; u64 tx_credits; + rvu_get_lbk_link_max_frs(rvu, &lbk_max_frs); + rvu_get_lmac_link_max_frs(rvu, &lmac_max_frs); + /* Set default min/max packet lengths allowed on NIX Rx links. * * With HW reset minlen value of 60byte, HW will treat ARP pkts * as undersize and report them to SW as error pkts, hence * setting it to 40 bytes. */ - for (link = 0; link < (hw->cgx_links + hw->lbk_links); link++) { + for (link = 0; link < hw->cgx_links; link++) { rvu_write64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link), - NIC_HW_MAX_FRS << 16 | NIC_HW_MIN_FRS); + ((u64)lmac_max_frs << 16) | NIC_HW_MIN_FRS); } + for (link = hw->cgx_links; link < hw->lbk_links; link++) { + rvu_write64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link), + ((u64)lbk_max_frs << 16) | NIC_HW_MIN_FRS); + } if (hw->sdp_links) { link = hw->cgx_links + hw->lbk_links; rvu_write64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link), @@ -3236,7 +3309,8 @@ static void nix_link_config(struct rvu *rvu, int blkaddr) */ for (cgx = 0; cgx < hw->cgx; cgx++) { lmac_cnt = cgx_get_lmac_cnt(rvu_cgx_pdata(cgx, rvu)); - tx_credits = ((CGX_FIFO_LEN / lmac_cnt) - NIC_HW_MAX_FRS) / 16; + tx_credits = ((rvu_cgx_get_fifolen(rvu) / lmac_cnt) - + lmac_max_frs) / 16; /* Enable credits and set credit pkt count to max allowed */ tx_credits = (tx_credits << 12) | (0x1FF << 2) | BIT_ULL(1); slink = cgx * hw->lmac_per_cgx; @@ -3250,7 +3324,7 @@ static void nix_link_config(struct rvu *rvu, int blkaddr) /* Set Tx credits for LBK link */ slink = hw->cgx_links; for (link = slink; link < (slink + hw->lbk_links); link++) { - tx_credits = 1000; /* 10 * max LBK datarate = 10 * 100Gbps */ + tx_credits = rvu_get_lbk_link_credits(rvu, lbk_max_frs); /* Enable credits and set credit pkt count to max allowed */ tx_credits = (tx_credits << 12) | (0x1FF << 2) | BIT_ULL(1); rvu_write64(rvu, blkaddr, @@ -3381,14 +3455,6 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw) if (err) return err; - /* Set num of links of each type */ - cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST); - hw->cgx = (cfg >> 12) & 0xF; - hw->lmac_per_cgx = (cfg >> 8) & 0xF; - hw->cgx_links = hw->cgx * hw->lmac_per_cgx; - hw->lbk_links = (cfg >> 24) & 0xF; - hw->sdp_links = 1; - /* Initialize admin queue */ err = nix_aq_init(rvu, block); if (err) @@ -3623,10 +3689,14 @@ static int rvu_nix_lf_ptp_tx_cfg(struct rvu *rvu, u16 pcifunc, bool enable) { struct rvu_hwinfo *hw = rvu->hw; struct rvu_block *block; - int blkaddr; + int blkaddr, pf; int nixlf; u64 cfg; + pf = rvu_get_pf(pcifunc); + if (!is_mac_feature_supported(rvu, pf, RVU_LMAC_FEAT_PTP)) + return 0; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); if (blkaddr < 0) return NIX_AF_ERR_AF_LF_INVALID; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 5cf9b7a907ae..04bb0803a5c5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -102,9 +102,9 @@ int npc_mcam_verify_channel(struct rvu *rvu, u16 pcifunc, u8 intf, u16 channel) return -EINVAL; } else { rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); - base = NIX_CHAN_CGX_LMAC_CHX(cgx_id, lmac_id, 0x0); + base = rvu_nix_chan_cgx(rvu, cgx_id, lmac_id, 0x0); /* CGX mapped functions has maximum of 16 channels */ - end = NIX_CHAN_CGX_LMAC_CHX(cgx_id, lmac_id, 0xF); + end = rvu_nix_chan_cgx(rvu, cgx_id, lmac_id, 0xF); } if (channel < base || channel > end) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index 79a6dcf0e3c0..3e401fd8ac63 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -44,6 +44,11 @@ #define RVU_AF_PFME_INT_W1S (0x28c8) #define RVU_AF_PFME_INT_ENA_W1S (0x28d0) #define RVU_AF_PFME_INT_ENA_W1C (0x28d8) +#define RVU_AF_PFX_BAR4_ADDR(a) (0x5000 | (a) << 4) +#define RVU_AF_PFX_BAR4_CFG (0x5200 | (a) << 4) +#define RVU_AF_PFX_VF_BAR4_ADDR (0x5400 | (a) << 4) +#define RVU_AF_PFX_VF_BAR4_CFG (0x5600 | (a) << 4) +#define RVU_AF_PFX_LMTLINE_ADDR (0x5800 | (a) << 4) /* Admin function's privileged PF/VF registers */ #define RVU_PRIV_CONST (0x8000000) @@ -100,6 +105,8 @@ #define RVU_PF_MSIX_VECX_ADDR(a) (0x000 | (a) << 4) #define RVU_PF_MSIX_VECX_CTL(a) (0x008 | (a) << 4) #define RVU_PF_MSIX_PBAX(a) (0xF0000 | (a) << 3) +#define RVU_PF_VF_MBOX_ADDR (0xC40) +#define RVU_PF_LMTLINE_ADDR (0xC48) /* RVU VF registers */ #define RVU_VF_VFPF_MBOX0 (0x00000) @@ -399,12 +406,16 @@ #define NIX_AF_RX_NPC_MIRROR_RCV (0x4720) #define NIX_AF_RX_NPC_MIRROR_DROP (0x4730) #define NIX_AF_RX_ACTIVE_CYCLES_PCX(a) (0x4800 | (a) << 16) +#define NIX_AF_LINKX_CFG(a) (0x4010 | (a) << 17) #define NIX_PRIV_AF_INT_CFG (0x8000000) #define NIX_PRIV_LFX_CFG (0x8000010) #define NIX_PRIV_LFX_INT_CFG (0x8000020) #define NIX_AF_RVU_LF_CFG_DEBUG (0x8000030) +#define NIX_AF_LINKX_BASE_MASK GENMASK_ULL(11, 0) +#define NIX_AF_LINKX_RANGE_MASK GENMASK_ULL(19, 16) + /* SSO */ #define SSO_AF_CONST (0x1000) #define SSO_AF_CONST1 (0x1008) @@ -637,4 +648,17 @@ (0x00F00 | (a) << 5 | (b) << 4) #define NDC_AF_BANKX_HIT_PC(a) (0x01000 | (a) << 3) #define NDC_AF_BANKX_MISS_PC(a) (0x01100 | (a) << 3) + +/* LBK */ +#define LBK_CONST (0x10ull) +#define LBK_LINK_CFG_P2X (0x400ull) +#define LBK_LINK_CFG_X2P (0x408ull) +#define LBK_CONST_CHANS GENMASK_ULL(47, 32) +#define LBK_CONST_DST GENMASK_ULL(31, 28) +#define LBK_CONST_SRC GENMASK_ULL(27, 24) +#define LBK_CONST_BUF_SIZE GENMASK_ULL(23, 0) +#define LBK_LINK_CFG_RANGE_MASK GENMASK_ULL(19, 16) +#define LBK_LINK_CFG_ID_MASK GENMASK_ULL(11, 6) +#define LBK_LINK_CFG_BASE_MASK GENMASK_ULL(5, 0) + #endif /* RVU_REG_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h index 5e15f4fc11e3..5e5f45c7eab0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h @@ -139,63 +139,29 @@ enum npa_inpq { /* NPA admin queue instruction structure */ struct npa_aq_inst_s { -#if defined(__BIG_ENDIAN_BITFIELD) - u64 doneint : 1; /* W0 */ - u64 reserved_44_62 : 19; - u64 cindex : 20; - u64 reserved_17_23 : 7; - u64 lf : 9; - u64 ctype : 4; - u64 op : 4; -#else - u64 op : 4; + u64 op : 4; /* W0 */ u64 ctype : 4; u64 lf : 9; u64 reserved_17_23 : 7; u64 cindex : 20; u64 reserved_44_62 : 19; u64 doneint : 1; -#endif u64 res_addr; /* W1 */ }; /* NPA admin queue result structure */ struct npa_aq_res_s { -#if defined(__BIG_ENDIAN_BITFIELD) - u64 reserved_17_63 : 47; /* W0 */ - u64 doneint : 1; - u64 compcode : 8; - u64 ctype : 4; - u64 op : 4; -#else - u64 op : 4; + u64 op : 4; /* W0 */ u64 ctype : 4; u64 compcode : 8; u64 doneint : 1; u64 reserved_17_63 : 47; -#endif u64 reserved_64_127; /* W1 */ }; struct npa_aura_s { u64 pool_addr; /* W0 */ -#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */ - u64 avg_level : 8; - u64 reserved_118_119 : 2; - u64 shift : 6; - u64 aura_drop : 8; - u64 reserved_98_103 : 6; - u64 bp_ena : 2; - u64 aura_drop_ena : 1; - u64 pool_drop_ena : 1; - u64 reserved_93 : 1; - u64 avg_con : 9; - u64 pool_way_mask : 16; - u64 pool_caching : 1; - u64 reserved_65 : 2; - u64 ena : 1; -#else - u64 ena : 1; + u64 ena : 1; /* W1 */ u64 reserved_65 : 2; u64 pool_caching : 1; u64 pool_way_mask : 16; @@ -209,59 +175,24 @@ struct npa_aura_s { u64 shift : 6; u64 reserved_118_119 : 2; u64 avg_level : 8; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */ - u64 reserved_189_191 : 3; - u64 nix1_bpid : 9; - u64 reserved_177_179 : 3; - u64 nix0_bpid : 9; - u64 reserved_164_167 : 4; - u64 count : 36; -#else - u64 count : 36; + u64 count : 36; /* W2 */ u64 reserved_164_167 : 4; u64 nix0_bpid : 9; u64 reserved_177_179 : 3; u64 nix1_bpid : 9; u64 reserved_189_191 : 3; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */ - u64 reserved_252_255 : 4; - u64 fc_hyst_bits : 4; - u64 fc_stype : 2; - u64 fc_up_crossing : 1; - u64 fc_ena : 1; - u64 reserved_240_243 : 4; - u64 bp : 8; - u64 reserved_228_231 : 4; - u64 limit : 36; -#else - u64 limit : 36; + u64 limit : 36; /* W3 */ u64 reserved_228_231 : 4; u64 bp : 8; - u64 reserved_240_243 : 4; + u64 reserved_241_243 : 3; + u64 fc_be : 1; u64 fc_ena : 1; u64 fc_up_crossing : 1; u64 fc_stype : 2; u64 fc_hyst_bits : 4; u64 reserved_252_255 : 4; -#endif u64 fc_addr; /* W4 */ -#if defined(__BIG_ENDIAN_BITFIELD) /* W5 */ - u64 reserved_379_383 : 5; - u64 err_qint_idx : 7; - u64 reserved_371 : 1; - u64 thresh_qint_idx : 7; - u64 reserved_363 : 1; - u64 thresh_up : 1; - u64 thresh_int_ena : 1; - u64 thresh_int : 1; - u64 err_int_ena : 8; - u64 err_int : 8; - u64 update_time : 16; - u64 pool_drop : 8; -#else - u64 pool_drop : 8; + u64 pool_drop : 8; /* W5 */ u64 update_time : 16; u64 err_int : 8; u64 err_int_ena : 8; @@ -273,31 +204,15 @@ struct npa_aura_s { u64 reserved_371 : 1; u64 err_qint_idx : 7; u64 reserved_379_383 : 5; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W6 */ - u64 reserved_420_447 : 28; - u64 thresh : 36; -#else - u64 thresh : 36; - u64 reserved_420_447 : 28; -#endif + u64 thresh : 36; /* W6*/ + u64 rsvd_423_420 : 4; + u64 fc_msh_dst : 11; + u64 reserved_435_447 : 13; u64 reserved_448_511; /* W7 */ }; struct npa_pool_s { u64 stack_base; /* W0 */ -#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */ - u64 reserved_115_127 : 13; - u64 buf_size : 11; - u64 reserved_100_103 : 4; - u64 buf_offset : 12; - u64 stack_way_mask : 16; - u64 reserved_70_71 : 3; - u64 stack_caching : 1; - u64 reserved_66_67 : 2; - u64 nat_align : 1; - u64 ena : 1; -#else u64 ena : 1; u64 nat_align : 1; u64 reserved_66_67 : 2; @@ -308,36 +223,10 @@ struct npa_pool_s { u64 reserved_100_103 : 4; u64 buf_size : 11; u64 reserved_115_127 : 13; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */ - u64 stack_pages : 32; - u64 stack_max_pages : 32; -#else u64 stack_max_pages : 32; u64 stack_pages : 32; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */ - u64 reserved_240_255 : 16; - u64 op_pc : 48; -#else u64 op_pc : 48; u64 reserved_240_255 : 16; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W4 */ - u64 reserved_316_319 : 4; - u64 update_time : 16; - u64 reserved_297_299 : 3; - u64 fc_up_crossing : 1; - u64 fc_hyst_bits : 4; - u64 fc_stype : 2; - u64 fc_ena : 1; - u64 avg_con : 9; - u64 avg_level : 8; - u64 reserved_270_271 : 2; - u64 shift : 6; - u64 reserved_260_263 : 4; - u64 stack_offset : 4; -#else u64 stack_offset : 4; u64 reserved_260_263 : 4; u64 shift : 6; @@ -348,26 +237,13 @@ struct npa_pool_s { u64 fc_stype : 2; u64 fc_hyst_bits : 4; u64 fc_up_crossing : 1; - u64 reserved_297_299 : 3; + u64 fc_be : 1; + u64 reserved_298_299 : 2; u64 update_time : 16; u64 reserved_316_319 : 4; -#endif u64 fc_addr; /* W5 */ u64 ptr_start; /* W6 */ u64 ptr_end; /* W7 */ -#if defined(__BIG_ENDIAN_BITFIELD) /* W8 */ - u64 reserved_571_575 : 5; - u64 err_qint_idx : 7; - u64 reserved_563 : 1; - u64 thresh_qint_idx : 7; - u64 reserved_555 : 1; - u64 thresh_up : 1; - u64 thresh_int_ena : 1; - u64 thresh_int : 1; - u64 err_int_ena : 8; - u64 err_int : 8; - u64 reserved_512_535 : 24; -#else u64 reserved_512_535 : 24; u64 err_int : 8; u64 err_int_ena : 8; @@ -379,14 +255,10 @@ struct npa_pool_s { u64 reserved_563 : 1; u64 err_qint_idx : 7; u64 reserved_571_575 : 5; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W9 */ - u64 reserved_612_639 : 28; u64 thresh : 36; -#else - u64 thresh : 36; - u64 reserved_612_639 : 28; -#endif + u64 rsvd_615_612 : 4; + u64 fc_msh_dst : 11; + u64 reserved_627_639 : 13; u64 reserved_640_703; /* W10 */ u64 reserved_704_767; /* W11 */ u64 reserved_768_831; /* W12 */ @@ -414,6 +286,7 @@ enum nix_aq_ctype { NIX_AQ_CTYPE_MCE = 0x3, NIX_AQ_CTYPE_RSS = 0x4, NIX_AQ_CTYPE_DYNO = 0x5, + NIX_AQ_CTYPE_BAND_PROF = 0x6, }; /* NIX admin queue instruction opcodes */ @@ -428,59 +301,29 @@ enum nix_aq_instop { /* NIX admin queue instruction structure */ struct nix_aq_inst_s { -#if defined(__BIG_ENDIAN_BITFIELD) - u64 doneint : 1; /* W0 */ - u64 reserved_44_62 : 19; - u64 cindex : 20; - u64 reserved_15_23 : 9; - u64 lf : 7; - u64 ctype : 4; - u64 op : 4; -#else u64 op : 4; u64 ctype : 4; - u64 lf : 7; - u64 reserved_15_23 : 9; + u64 lf : 9; + u64 reserved_17_23 : 7; u64 cindex : 20; u64 reserved_44_62 : 19; u64 doneint : 1; -#endif u64 res_addr; /* W1 */ }; /* NIX admin queue result structure */ struct nix_aq_res_s { -#if defined(__BIG_ENDIAN_BITFIELD) - u64 reserved_17_63 : 47; /* W0 */ - u64 doneint : 1; - u64 compcode : 8; - u64 ctype : 4; - u64 op : 4; -#else u64 op : 4; u64 ctype : 4; u64 compcode : 8; u64 doneint : 1; u64 reserved_17_63 : 47; -#endif u64 reserved_64_127; /* W1 */ }; /* NIX Completion queue context structure */ struct nix_cq_ctx_s { u64 base; -#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */ - u64 wrptr : 20; - u64 avg_con : 9; - u64 cint_idx : 7; - u64 cq_err : 1; - u64 qint_idx : 7; - u64 rsvd_81_83 : 3; - u64 bpid : 9; - u64 rsvd_69_71 : 3; - u64 bp_ena : 1; - u64 rsvd_64_67 : 4; -#else u64 rsvd_64_67 : 4; u64 bp_ena : 1; u64 rsvd_69_71 : 3; @@ -491,31 +334,10 @@ struct nix_cq_ctx_s { u64 cint_idx : 7; u64 avg_con : 9; u64 wrptr : 20; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */ - u64 update_time : 16; - u64 avg_level : 8; - u64 head : 20; - u64 tail : 20; -#else u64 tail : 20; u64 head : 20; u64 avg_level : 8; u64 update_time : 16; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */ - u64 cq_err_int_ena : 8; - u64 cq_err_int : 8; - u64 qsize : 4; - u64 rsvd_233_235 : 3; - u64 caching : 1; - u64 substream : 20; - u64 rsvd_210_211 : 2; - u64 ena : 1; - u64 drop_ena : 1; - u64 drop : 8; - u64 bp : 8; -#else u64 bp : 8; u64 drop : 8; u64 drop_ena : 1; @@ -527,20 +349,161 @@ struct nix_cq_ctx_s { u64 qsize : 4; u64 cq_err_int : 8; u64 cq_err_int_ena : 8; -#endif +}; + +/* CN10K NIX Receive queue context structure */ +struct nix_cn10k_rq_ctx_s { + u64 ena : 1; + u64 sso_ena : 1; + u64 ipsech_ena : 1; + u64 ena_wqwd : 1; + u64 cq : 20; + u64 rsvd_36_24 : 13; + u64 lenerr_dis : 1; + u64 csum_il4_dis : 1; + u64 csum_ol4_dis : 1; + u64 len_il4_dis : 1; + u64 len_il3_dis : 1; + u64 len_ol4_dis : 1; + u64 len_ol3_dis : 1; + u64 wqe_aura : 20; + u64 spb_aura : 20; + u64 lpb_aura : 20; + u64 sso_grp : 10; + u64 sso_tt : 2; + u64 pb_caching : 2; + u64 wqe_caching : 1; + u64 xqe_drop_ena : 1; + u64 spb_drop_ena : 1; + u64 lpb_drop_ena : 1; + u64 pb_stashing : 1; + u64 ipsecd_drop_ena : 1; + u64 chi_ena : 1; + u64 rsvd_127_125 : 3; + u64 band_prof_id : 10; /* W2 */ + u64 rsvd_138 : 1; + u64 policer_ena : 1; + u64 spb_sizem1 : 6; + u64 wqe_skip : 2; + u64 rsvd_150_148 : 3; + u64 spb_ena : 1; + u64 lpb_sizem1 : 12; + u64 first_skip : 7; + u64 rsvd_171 : 1; + u64 later_skip : 6; + u64 xqe_imm_size : 6; + u64 rsvd_189_184 : 6; + u64 xqe_imm_copy : 1; + u64 xqe_hdr_split : 1; + u64 xqe_drop : 8; /* W3 */ + u64 xqe_pass : 8; + u64 wqe_pool_drop : 8; + u64 wqe_pool_pass : 8; + u64 spb_aura_drop : 8; + u64 spb_aura_pass : 8; + u64 spb_pool_drop : 8; + u64 spb_pool_pass : 8; + u64 lpb_aura_drop : 8; /* W4 */ + u64 lpb_aura_pass : 8; + u64 lpb_pool_drop : 8; + u64 lpb_pool_pass : 8; + u64 rsvd_291_288 : 4; + u64 rq_int : 8; + u64 rq_int_ena : 8; + u64 qint_idx : 7; + u64 rsvd_319_315 : 5; + u64 ltag : 24; /* W5 */ + u64 good_utag : 8; + u64 bad_utag : 8; + u64 flow_tagw : 6; + u64 ipsec_vwqe : 1; + u64 vwqe_ena : 1; + u64 vwqe_wait : 8; + u64 max_vsize_exp : 4; + u64 vwqe_skip : 2; + u64 rsvd_383_382 : 2; + u64 octs : 48; /* W6 */ + u64 rsvd_447_432 : 16; + u64 pkts : 48; /* W7 */ + u64 rsvd_511_496 : 16; + u64 drop_octs : 48; /* W8 */ + u64 rsvd_575_560 : 16; + u64 drop_pkts : 48; /* W9 */ + u64 rsvd_639_624 : 16; + u64 re_pkts : 48; /* W10 */ + u64 rsvd_703_688 : 16; + u64 rsvd_767_704; /* W11 */ + u64 rsvd_831_768; /* W12 */ + u64 rsvd_895_832; /* W13 */ + u64 rsvd_959_896; /* W14 */ + u64 rsvd_1023_960; /* W15 */ +}; + +/* CN10K NIX Send queue context structure */ +struct nix_cn10k_sq_ctx_s { + u64 ena : 1; + u64 qint_idx : 6; + u64 substream : 20; + u64 sdp_mcast : 1; + u64 cq : 20; + u64 sqe_way_mask : 16; + u64 smq : 10; /* W1 */ + u64 cq_ena : 1; + u64 xoff : 1; + u64 sso_ena : 1; + u64 smq_rr_weight : 14; + u64 default_chan : 12; + u64 sqb_count : 16; + u64 rsvd_120_119 : 2; + u64 smq_rr_count_lb : 7; + u64 smq_rr_count_ub : 25; /* W2 */ + u64 sqb_aura : 20; + u64 sq_int : 8; + u64 sq_int_ena : 8; + u64 sqe_stype : 2; + u64 rsvd_191 : 1; + u64 max_sqe_size : 2; /* W3 */ + u64 cq_limit : 8; + u64 lmt_dis : 1; + u64 mnq_dis : 1; + u64 smq_next_sq : 20; + u64 smq_lso_segnum : 8; + u64 tail_offset : 6; + u64 smenq_offset : 6; + u64 head_offset : 6; + u64 smenq_next_sqb_vld : 1; + u64 smq_pend : 1; + u64 smq_next_sq_vld : 1; + u64 rsvd_255_253 : 3; + u64 next_sqb : 64; /* W4 */ + u64 tail_sqb : 64; /* W5 */ + u64 smenq_sqb : 64; /* W6 */ + u64 smenq_next_sqb : 64; /* W7 */ + u64 head_sqb : 64; /* W8 */ + u64 rsvd_583_576 : 8; /* W9 */ + u64 vfi_lso_total : 18; + u64 vfi_lso_sizem1 : 3; + u64 vfi_lso_sb : 8; + u64 vfi_lso_mps : 14; + u64 vfi_lso_vlan0_ins_ena : 1; + u64 vfi_lso_vlan1_ins_ena : 1; + u64 vfi_lso_vld : 1; + u64 rsvd_639_630 : 10; + u64 scm_lso_rem : 18; /* W10 */ + u64 rsvd_703_658 : 46; + u64 octs : 48; /* W11 */ + u64 rsvd_767_752 : 16; + u64 pkts : 48; /* W12 */ + u64 rsvd_831_816 : 16; + u64 rsvd_895_832 : 64; /* W13 */ + u64 dropped_octs : 48; + u64 rsvd_959_944 : 16; + u64 dropped_pkts : 48; + u64 rsvd_1023_1008 : 16; }; /* NIX Receive queue context structure */ struct nix_rq_ctx_s { -#if defined(__BIG_ENDIAN_BITFIELD) /* W0 */ - u64 wqe_aura : 20; - u64 substream : 20; - u64 cq : 20; - u64 ena_wqwd : 1; - u64 ipsech_ena : 1; - u64 sso_ena : 1; - u64 ena : 1; -#else u64 ena : 1; u64 sso_ena : 1; u64 ipsech_ena : 1; @@ -548,19 +511,6 @@ struct nix_rq_ctx_s { u64 cq : 20; u64 substream : 20; u64 wqe_aura : 20; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */ - u64 rsvd_127_122 : 6; - u64 lpb_drop_ena : 1; - u64 spb_drop_ena : 1; - u64 xqe_drop_ena : 1; - u64 wqe_caching : 1; - u64 pb_caching : 2; - u64 sso_tt : 2; - u64 sso_grp : 10; - u64 lpb_aura : 20; - u64 spb_aura : 20; -#else u64 spb_aura : 20; u64 lpb_aura : 20; u64 sso_grp : 10; @@ -571,23 +521,7 @@ struct nix_rq_ctx_s { u64 spb_drop_ena : 1; u64 lpb_drop_ena : 1; u64 rsvd_127_122 : 6; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */ - u64 xqe_hdr_split : 1; - u64 xqe_imm_copy : 1; - u64 rsvd_189_184 : 6; - u64 xqe_imm_size : 6; - u64 later_skip : 6; - u64 rsvd_171 : 1; - u64 first_skip : 7; - u64 lpb_sizem1 : 12; - u64 spb_ena : 1; - u64 rsvd_150_148 : 3; - u64 wqe_skip : 2; - u64 spb_sizem1 : 6; - u64 rsvd_139_128 : 12; -#else - u64 rsvd_139_128 : 12; + u64 rsvd_139_128 : 12; /* W2 */ u64 spb_sizem1 : 6; u64 wqe_skip : 2; u64 rsvd_150_148 : 3; @@ -600,18 +534,7 @@ struct nix_rq_ctx_s { u64 rsvd_189_184 : 6; u64 xqe_imm_copy : 1; u64 xqe_hdr_split : 1; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */ - u64 spb_pool_pass : 8; - u64 spb_pool_drop : 8; - u64 spb_aura_pass : 8; - u64 spb_aura_drop : 8; - u64 wqe_pool_pass : 8; - u64 wqe_pool_drop : 8; - u64 xqe_pass : 8; - u64 xqe_drop : 8; -#else - u64 xqe_drop : 8; + u64 xqe_drop : 8; /* W3*/ u64 xqe_pass : 8; u64 wqe_pool_drop : 8; u64 wqe_pool_pass : 8; @@ -619,19 +542,7 @@ struct nix_rq_ctx_s { u64 spb_aura_pass : 8; u64 spb_pool_drop : 8; u64 spb_pool_pass : 8; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W4 */ - u64 rsvd_319_315 : 5; - u64 qint_idx : 7; - u64 rq_int_ena : 8; - u64 rq_int : 8; - u64 rsvd_291_288 : 4; - u64 lpb_pool_pass : 8; - u64 lpb_pool_drop : 8; - u64 lpb_aura_pass : 8; - u64 lpb_aura_drop : 8; -#else - u64 lpb_aura_drop : 8; + u64 lpb_aura_drop : 8; /* W4 */ u64 lpb_aura_pass : 8; u64 lpb_pool_drop : 8; u64 lpb_pool_pass : 8; @@ -640,55 +551,21 @@ struct nix_rq_ctx_s { u64 rq_int_ena : 8; u64 qint_idx : 7; u64 rsvd_319_315 : 5; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W5 */ - u64 rsvd_383_366 : 18; - u64 flow_tagw : 6; - u64 bad_utag : 8; - u64 good_utag : 8; - u64 ltag : 24; -#else - u64 ltag : 24; + u64 ltag : 24; /* W5 */ u64 good_utag : 8; u64 bad_utag : 8; u64 flow_tagw : 6; u64 rsvd_383_366 : 18; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W6 */ - u64 rsvd_447_432 : 16; - u64 octs : 48; -#else - u64 octs : 48; + u64 octs : 48; /* W6 */ u64 rsvd_447_432 : 16; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W7 */ - u64 rsvd_511_496 : 16; - u64 pkts : 48; -#else - u64 pkts : 48; + u64 pkts : 48; /* W7 */ u64 rsvd_511_496 : 16; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W8 */ + u64 drop_octs : 48; /* W8 */ u64 rsvd_575_560 : 16; - u64 drop_octs : 48; -#else - u64 drop_octs : 48; - u64 rsvd_575_560 : 16; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W9 */ - u64 rsvd_639_624 : 16; - u64 drop_pkts : 48; -#else - u64 drop_pkts : 48; + u64 drop_pkts : 48; /* W9 */ u64 rsvd_639_624 : 16; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W10 */ + u64 re_pkts : 48; /* W10 */ u64 rsvd_703_688 : 16; - u64 re_pkts : 48; -#else - u64 re_pkts : 48; - u64 rsvd_703_688 : 16; -#endif u64 rsvd_767_704; /* W11 */ u64 rsvd_831_768; /* W12 */ u64 rsvd_895_832; /* W13 */ @@ -711,30 +588,12 @@ enum nix_stype { /* NIX Send queue context structure */ struct nix_sq_ctx_s { -#if defined(__BIG_ENDIAN_BITFIELD) /* W0 */ - u64 sqe_way_mask : 16; - u64 cq : 20; - u64 sdp_mcast : 1; - u64 substream : 20; - u64 qint_idx : 6; - u64 ena : 1; -#else u64 ena : 1; u64 qint_idx : 6; u64 substream : 20; u64 sdp_mcast : 1; u64 cq : 20; u64 sqe_way_mask : 16; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */ - u64 sqb_count : 16; - u64 default_chan : 12; - u64 smq_rr_quantum : 24; - u64 sso_ena : 1; - u64 xoff : 1; - u64 cq_ena : 1; - u64 smq : 9; -#else u64 smq : 9; u64 cq_ena : 1; u64 xoff : 1; @@ -742,37 +601,12 @@ struct nix_sq_ctx_s { u64 smq_rr_quantum : 24; u64 default_chan : 12; u64 sqb_count : 16; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */ - u64 rsvd_191 : 1; - u64 sqe_stype : 2; - u64 sq_int_ena : 8; - u64 sq_int : 8; - u64 sqb_aura : 20; - u64 smq_rr_count : 25; -#else u64 smq_rr_count : 25; u64 sqb_aura : 20; u64 sq_int : 8; u64 sq_int_ena : 8; u64 sqe_stype : 2; u64 rsvd_191 : 1; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */ - u64 rsvd_255_253 : 3; - u64 smq_next_sq_vld : 1; - u64 smq_pend : 1; - u64 smenq_next_sqb_vld : 1; - u64 head_offset : 6; - u64 smenq_offset : 6; - u64 tail_offset : 6; - u64 smq_lso_segnum : 8; - u64 smq_next_sq : 20; - u64 mnq_dis : 1; - u64 lmt_dis : 1; - u64 cq_limit : 8; - u64 max_sqe_size : 2; -#else u64 max_sqe_size : 2; u64 cq_limit : 8; u64 lmt_dis : 1; @@ -786,23 +620,11 @@ struct nix_sq_ctx_s { u64 smq_pend : 1; u64 smq_next_sq_vld : 1; u64 rsvd_255_253 : 3; -#endif u64 next_sqb : 64;/* W4 */ u64 tail_sqb : 64;/* W5 */ u64 smenq_sqb : 64;/* W6 */ u64 smenq_next_sqb : 64;/* W7 */ u64 head_sqb : 64;/* W8 */ -#if defined(__BIG_ENDIAN_BITFIELD) /* W9 */ - u64 rsvd_639_630 : 10; - u64 vfi_lso_vld : 1; - u64 vfi_lso_vlan1_ins_ena : 1; - u64 vfi_lso_vlan0_ins_ena : 1; - u64 vfi_lso_mps : 14; - u64 vfi_lso_sb : 8; - u64 vfi_lso_sizem1 : 3; - u64 vfi_lso_total : 18; - u64 rsvd_583_576 : 8; -#else u64 rsvd_583_576 : 8; u64 vfi_lso_total : 18; u64 vfi_lso_sizem1 : 3; @@ -812,68 +634,28 @@ struct nix_sq_ctx_s { u64 vfi_lso_vlan1_ins_ena : 1; u64 vfi_lso_vld : 1; u64 rsvd_639_630 : 10; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W10 */ - u64 rsvd_703_658 : 46; - u64 scm_lso_rem : 18; -#else u64 scm_lso_rem : 18; u64 rsvd_703_658 : 46; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W11 */ - u64 rsvd_767_752 : 16; - u64 octs : 48; -#else u64 octs : 48; u64 rsvd_767_752 : 16; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W12 */ - u64 rsvd_831_816 : 16; - u64 pkts : 48; -#else u64 pkts : 48; u64 rsvd_831_816 : 16; -#endif u64 rsvd_895_832 : 64;/* W13 */ -#if defined(__BIG_ENDIAN_BITFIELD) /* W14 */ - u64 rsvd_959_944 : 16; - u64 dropped_octs : 48; -#else u64 dropped_octs : 48; u64 rsvd_959_944 : 16; -#endif -#if defined(__BIG_ENDIAN_BITFIELD) /* W15 */ - u64 rsvd_1023_1008 : 16; - u64 dropped_pkts : 48; -#else u64 dropped_pkts : 48; u64 rsvd_1023_1008 : 16; -#endif }; /* NIX Receive side scaling entry structure*/ struct nix_rsse_s { -#if defined(__BIG_ENDIAN_BITFIELD) - uint32_t reserved_20_31 : 12; - uint32_t rq : 20; -#else uint32_t rq : 20; uint32_t reserved_20_31 : 12; -#endif }; /* NIX receive multicast/mirror entry structure */ struct nix_rx_mce_s { -#if defined(__BIG_ENDIAN_BITFIELD) /* W0 */ - uint64_t next : 16; - uint64_t pf_func : 16; - uint64_t rsvd_31_24 : 8; - uint64_t index : 20; - uint64_t eol : 1; - uint64_t rsvd_2 : 1; - uint64_t op : 2; -#else uint64_t op : 2; uint64_t rsvd_2 : 1; uint64_t eol : 1; @@ -881,7 +663,6 @@ struct nix_rx_mce_s { uint64_t rsvd_31_24 : 8; uint64_t pf_func : 16; uint64_t next : 16; -#endif }; enum nix_lsoalg { @@ -900,15 +681,6 @@ enum nix_txlayer { }; struct nix_lso_format { -#if defined(__BIG_ENDIAN_BITFIELD) - u64 rsvd_19_63 : 45; - u64 alg : 3; - u64 rsvd_14_15 : 2; - u64 sizem1 : 2; - u64 rsvd_10_11 : 2; - u64 layer : 2; - u64 offset : 8; -#else u64 offset : 8; u64 layer : 2; u64 rsvd_10_11 : 2; @@ -916,24 +688,9 @@ struct nix_lso_format { u64 rsvd_14_15 : 2; u64 alg : 3; u64 rsvd_19_63 : 45; -#endif }; struct nix_rx_flowkey_alg { -#if defined(__BIG_ENDIAN_BITFIELD) - u64 reserved_35_63 :29; - u64 ltype_match :4; - u64 ltype_mask :4; - u64 sel_chan :1; - u64 ena :1; - u64 reserved_24_24 :1; - u64 lid :3; - u64 bytesm1 :5; - u64 hdr_offset :8; - u64 fn_mask :1; - u64 ln_mask :1; - u64 key_offset :6; -#else u64 key_offset :6; u64 ln_mask :1; u64 fn_mask :1; @@ -946,7 +703,6 @@ struct nix_rx_flowkey_alg { u64 ltype_mask :4; u64 ltype_match :4; u64 reserved_35_63 :29; -#endif }; /* NIX VTAG size */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile index 4193ae3bde6b..745aa8a19499 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile @@ -3,11 +3,11 @@ # Makefile for Marvell's OcteonTX2 ethernet device drivers # -obj-$(CONFIG_OCTEONTX2_PF) += octeontx2_nicpf.o -obj-$(CONFIG_OCTEONTX2_VF) += octeontx2_nicvf.o +obj-$(CONFIG_OCTEONTX2_PF) += rvu_nicpf.o +obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o -octeontx2_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \ - otx2_ptp.o otx2_flows.o -octeontx2_nicvf-y := otx2_vf.o +rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \ + otx2_ptp.o otx2_flows.o cn10k.o +rvu_nicvf-y := otx2_vf.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c new file mode 100644 index 000000000000..9ec0313f13fc --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTx2 RVU Physcial Function ethernet driver + * + * Copyright (C) 2020 Marvell. + */ + +#include "cn10k.h" +#include "otx2_reg.h" +#include "otx2_struct.h" + +static struct dev_hw_ops otx2_hw_ops = { + .sq_aq_init = otx2_sq_aq_init, + .sqe_flush = otx2_sqe_flush, + .aura_freeptr = otx2_aura_freeptr, + .refill_pool_ptrs = otx2_refill_pool_ptrs, +}; + +static struct dev_hw_ops cn10k_hw_ops = { + .sq_aq_init = cn10k_sq_aq_init, + .sqe_flush = cn10k_sqe_flush, + .aura_freeptr = cn10k_aura_freeptr, + .refill_pool_ptrs = cn10k_refill_pool_ptrs, +}; + +int cn10k_pf_lmtst_init(struct otx2_nic *pf) +{ + int size, num_lines; + u64 base; + + if (!test_bit(CN10K_LMTST, &pf->hw.cap_flag)) { + pf->hw_ops = &otx2_hw_ops; + return 0; + } + + pf->hw_ops = &cn10k_hw_ops; + base = pci_resource_start(pf->pdev, PCI_MBOX_BAR_NUM) + + (MBOX_SIZE * (pf->total_vfs + 1)); + + size = pci_resource_len(pf->pdev, PCI_MBOX_BAR_NUM) - + (MBOX_SIZE * (pf->total_vfs + 1)); + + pf->hw.lmt_base = ioremap(base, size); + + if (!pf->hw.lmt_base) { + dev_err(pf->dev, "Unable to map PF LMTST region\n"); + return -ENOMEM; + } + + /* FIXME: Get the num of LMTST lines from LMT table */ + pf->tot_lmt_lines = size / LMT_LINE_SIZE; + num_lines = (pf->tot_lmt_lines - NIX_LMTID_BASE) / + pf->hw.tx_queues; + /* Number of LMT lines per SQ queues */ + pf->nix_lmt_lines = num_lines > 32 ? 32 : num_lines; + + pf->nix_lmt_size = pf->nix_lmt_lines * LMT_LINE_SIZE; + return 0; +} + +int cn10k_vf_lmtst_init(struct otx2_nic *vf) +{ + int size, num_lines; + + if (!test_bit(CN10K_LMTST, &vf->hw.cap_flag)) { + vf->hw_ops = &otx2_hw_ops; + return 0; + } + + vf->hw_ops = &cn10k_hw_ops; + size = pci_resource_len(vf->pdev, PCI_MBOX_BAR_NUM); + vf->hw.lmt_base = ioremap_wc(pci_resource_start(vf->pdev, + PCI_MBOX_BAR_NUM), + size); + if (!vf->hw.lmt_base) { + dev_err(vf->dev, "Unable to map VF LMTST region\n"); + return -ENOMEM; + } + + vf->tot_lmt_lines = size / LMT_LINE_SIZE; + /* LMTST lines per SQ */ + num_lines = (vf->tot_lmt_lines - NIX_LMTID_BASE) / + vf->hw.tx_queues; + vf->nix_lmt_lines = num_lines > 32 ? 32 : num_lines; + vf->nix_lmt_size = vf->nix_lmt_lines * LMT_LINE_SIZE; + return 0; +} +EXPORT_SYMBOL(cn10k_vf_lmtst_init); + +int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura) +{ + struct nix_cn10k_aq_enq_req *aq; + struct otx2_nic *pfvf = dev; + struct otx2_snd_queue *sq; + + sq = &pfvf->qset.sq[qidx]; + sq->lmt_addr = (__force u64 *)((u64)pfvf->hw.nix_lmt_base + + (qidx * pfvf->nix_lmt_size)); + + /* Get memory to put this msg */ + aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox); + if (!aq) + return -ENOMEM; + + aq->sq.cq = pfvf->hw.rx_queues + qidx; + aq->sq.max_sqe_size = NIX_MAXSQESZ_W16; /* 128 byte */ + aq->sq.cq_ena = 1; + aq->sq.ena = 1; + /* Only one SMQ is allocated, map all SQ's to that SMQ */ + aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0]; + /* FIXME: set based on NIX_AF_DWRR_RPM_MTU*/ + aq->sq.smq_rr_weight = pfvf->netdev->mtu; + aq->sq.default_chan = pfvf->hw.tx_chan_base; + aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */ + aq->sq.sqb_aura = sqb_aura; + aq->sq.sq_int_ena = NIX_SQINT_BITS; + aq->sq.qint_idx = 0; + /* Due pipelining impact minimum 2000 unused SQ CQE's + * need to maintain to avoid CQ overflow. + */ + aq->sq.cq_limit = ((SEND_CQ_SKID * 256) / (pfvf->qset.sqe_cnt)); + + /* Fill AQ info */ + aq->qidx = qidx; + aq->ctype = NIX_AQ_CTYPE_SQ; + aq->op = NIX_AQ_INSTOP_INIT; + + return otx2_sync_mbox_msg(&pfvf->mbox); +} + +#define NPA_MAX_BURST 16 +void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq) +{ + struct otx2_nic *pfvf = dev; + u64 ptrs[NPA_MAX_BURST]; + int num_ptrs = 1; + dma_addr_t bufptr; + + /* Refill pool with new buffers */ + while (cq->pool_ptrs) { + if (otx2_alloc_buffer(pfvf, cq, &bufptr)) { + if (num_ptrs--) + __cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs, + num_ptrs, + cq->rbpool->lmt_addr); + break; + } + cq->pool_ptrs--; + ptrs[num_ptrs] = (u64)bufptr + OTX2_HEAD_ROOM; + num_ptrs++; + if (num_ptrs == NPA_MAX_BURST || cq->pool_ptrs == 0) { + __cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs, + num_ptrs, + cq->rbpool->lmt_addr); + num_ptrs = 1; + } + } +} + +void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx) +{ + struct otx2_nic *pfvf = dev; + int lmt_id = NIX_LMTID_BASE + (qidx * pfvf->nix_lmt_lines); + u64 val = 0, tar_addr = 0; + + /* FIXME: val[0:10] LMT_ID. + * [12:15] no of LMTST - 1 in the burst. + * [19:63] data size of each LMTST in the burst except first. + */ + val = (lmt_id & 0x7FF); + /* Target address for LMTST flush tells HW how many 128bit + * words are present. + * tar_addr[6:4] size of first LMTST - 1 in units of 128b. + */ + tar_addr |= sq->io_addr | (((size / 16) - 1) & 0x7) << 4; + dma_wmb(); + memcpy(sq->lmt_addr, sq->sqe_base, size); + cn10k_lmt_flush(val, tar_addr); + + sq->head++; + sq->head &= (sq->sqe_cnt - 1); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h new file mode 100644 index 000000000000..e0bc595cbb78 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTx2 RVU Ethernet driver + * + * Copyright (C) 2020 Marvell. + */ + +#ifndef CN10K_H +#define CN10K_H + +#include "otx2_common.h" + +void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq); +void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx); +int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); +int cn10k_pf_lmtst_init(struct otx2_nic *pf); +int cn10k_vf_lmtst_init(struct otx2_nic *vf); +#endif /* CN10K_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index cbd68fa9f1d6..cf7875d51d87 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -15,6 +15,7 @@ #include "otx2_reg.h" #include "otx2_common.h" #include "otx2_struct.h" +#include "cn10k.h" static void otx2_nix_rq_op_stats(struct queue_stats *stats, struct otx2_nic *pfvf, int qidx) @@ -60,6 +61,19 @@ void otx2_update_lmac_stats(struct otx2_nic *pfvf) mutex_unlock(&pfvf->mbox.lock); } +void otx2_update_lmac_fec_stats(struct otx2_nic *pfvf) +{ + struct msg_req *req; + + if (!netif_running(pfvf->netdev)) + return; + mutex_lock(&pfvf->mbox.lock); + req = otx2_mbox_alloc_msg_cgx_fec_stats(&pfvf->mbox); + if (req) + otx2_sync_mbox_msg(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); +} + int otx2_update_rq_stats(struct otx2_nic *pfvf, int qidx) { struct otx2_rcv_queue *rq = &pfvf->qset.rq[qidx]; @@ -216,7 +230,6 @@ int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu) return -ENOMEM; } - pfvf->max_frs = mtu + OTX2_ETH_HLEN; req->maxlen = pfvf->max_frs; err = otx2_sync_mbox_msg(&pfvf->mbox); @@ -483,33 +496,54 @@ void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx) (pfvf->hw.cq_ecount_wait - 1)); } -dma_addr_t __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool) +int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, + dma_addr_t *dma) { - dma_addr_t iova; u8 *buf; buf = napi_alloc_frag_align(pool->rbsize, OTX2_ALIGN); if (unlikely(!buf)) return -ENOMEM; - iova = dma_map_single_attrs(pfvf->dev, buf, pool->rbsize, + *dma = dma_map_single_attrs(pfvf->dev, buf, pool->rbsize, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); - if (unlikely(dma_mapping_error(pfvf->dev, iova))) { + if (unlikely(dma_mapping_error(pfvf->dev, *dma))) { page_frag_free(buf); return -ENOMEM; } - return iova; + return 0; } -static dma_addr_t otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool) +static int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, + dma_addr_t *dma) { - dma_addr_t addr; + int ret; local_bh_disable(); - addr = __otx2_alloc_rbuf(pfvf, pool); + ret = __otx2_alloc_rbuf(pfvf, pool, dma); local_bh_enable(); - return addr; + return ret; +} + +int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, + dma_addr_t *dma) +{ + if (unlikely(__otx2_alloc_rbuf(pfvf, cq->rbpool, dma))) { + struct refill_work *work; + struct delayed_work *dwork; + + work = &pfvf->refill_wrk[cq->cq_idx]; + dwork = &work->pool_refill_work; + /* Schedule a task if no other task is running */ + if (!cq->refill_task_sched) { + cq->refill_task_sched = true; + schedule_delayed_work(dwork, + msecs_to_jiffies(100)); + } + return -ENOMEM; + } + return 0; } void otx2_tx_timeout(struct net_device *netdev, unsigned int txq) @@ -571,8 +605,8 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl) /* Set topology e.t.c configuration */ if (lvl == NIX_TXSCH_LVL_SMQ) { req->reg[0] = NIX_AF_SMQX_CFG(schq); - req->regval[0] = ((OTX2_MAX_MTU + OTX2_ETH_HLEN) << 8) | - OTX2_MIN_MTU; + req->regval[0] = ((pfvf->netdev->max_mtu + OTX2_ETH_HLEN) << 8) + | OTX2_MIN_MTU; req->regval[0] |= (0x20ULL << 51) | (0x80ULL << 39) | (0x2ULL << 36); @@ -714,9 +748,6 @@ void otx2_sqb_flush(struct otx2_nic *pfvf) #define RQ_PASS_LVL_AURA (255 - ((95 * 256) / 100)) /* RED when 95% is full */ #define RQ_DROP_LVL_AURA (255 - ((99 * 256) / 100)) /* Drop when 99% is full */ -/* Send skid of 2000 packets required for CQ size of 4K CQEs. */ -#define SEND_CQ_SKID 2000 - static int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura) { struct otx2_qset *qset = &pfvf->qset; @@ -750,11 +781,48 @@ static int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura) return otx2_sync_mbox_msg(&pfvf->mbox); } +int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura) +{ + struct otx2_nic *pfvf = dev; + struct otx2_snd_queue *sq; + struct nix_aq_enq_req *aq; + + sq = &pfvf->qset.sq[qidx]; + sq->lmt_addr = (__force u64 *)(pfvf->reg_base + LMT_LF_LMTLINEX(qidx)); + /* Get memory to put this msg */ + aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox); + if (!aq) + return -ENOMEM; + + aq->sq.cq = pfvf->hw.rx_queues + qidx; + aq->sq.max_sqe_size = NIX_MAXSQESZ_W16; /* 128 byte */ + aq->sq.cq_ena = 1; + aq->sq.ena = 1; + /* Only one SMQ is allocated, map all SQ's to that SMQ */ + aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0]; + aq->sq.smq_rr_quantum = DFLT_RR_QTM; + aq->sq.default_chan = pfvf->hw.tx_chan_base; + aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */ + aq->sq.sqb_aura = sqb_aura; + aq->sq.sq_int_ena = NIX_SQINT_BITS; + aq->sq.qint_idx = 0; + /* Due pipelining impact minimum 2000 unused SQ CQE's + * need to maintain to avoid CQ overflow. + */ + aq->sq.cq_limit = ((SEND_CQ_SKID * 256) / (pfvf->qset.sqe_cnt)); + + /* Fill AQ info */ + aq->qidx = qidx; + aq->ctype = NIX_AQ_CTYPE_SQ; + aq->op = NIX_AQ_INSTOP_INIT; + + return otx2_sync_mbox_msg(&pfvf->mbox); +} + static int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) { struct otx2_qset *qset = &pfvf->qset; struct otx2_snd_queue *sq; - struct nix_aq_enq_req *aq; struct otx2_pool *pool; int err; @@ -791,40 +859,13 @@ static int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) sq->sqe_thresh = ((sq->num_sqbs * sq->sqe_per_sqb) * 10) / 100; sq->aura_id = sqb_aura; sq->aura_fc_addr = pool->fc_addr->base; - sq->lmt_addr = (__force u64 *)(pfvf->reg_base + LMT_LF_LMTLINEX(qidx)); sq->io_addr = (__force u64)otx2_get_regaddr(pfvf, NIX_LF_OP_SENDX(0)); sq->stats.bytes = 0; sq->stats.pkts = 0; - /* Get memory to put this msg */ - aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox); - if (!aq) - return -ENOMEM; - - aq->sq.cq = pfvf->hw.rx_queues + qidx; - aq->sq.max_sqe_size = NIX_MAXSQESZ_W16; /* 128 byte */ - aq->sq.cq_ena = 1; - aq->sq.ena = 1; - /* Only one SMQ is allocated, map all SQ's to that SMQ */ - aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0]; - aq->sq.smq_rr_quantum = DFLT_RR_QTM; - aq->sq.default_chan = pfvf->hw.tx_chan_base; - aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */ - aq->sq.sqb_aura = sqb_aura; - aq->sq.sq_int_ena = NIX_SQINT_BITS; - aq->sq.qint_idx = 0; - /* Due pipelining impact minimum 2000 unused SQ CQE's - * need to maintain to avoid CQ overflow. - */ - aq->sq.cq_limit = ((SEND_CQ_SKID * 256) / (sq->sqe_cnt)); + return pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura); - /* Fill AQ info */ - aq->qidx = qidx; - aq->ctype = NIX_AQ_CTYPE_SQ; - aq->op = NIX_AQ_INSTOP_INIT; - - return otx2_sync_mbox_msg(&pfvf->mbox); } static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx) @@ -903,7 +944,7 @@ static void otx2_pool_refill_task(struct work_struct *work) struct refill_work *wrk; int qidx, free_ptrs = 0; struct otx2_nic *pfvf; - s64 bufptr; + dma_addr_t bufptr; wrk = container_of(work, struct refill_work, pool_refill_work.work); pfvf = wrk->pf; @@ -913,8 +954,7 @@ static void otx2_pool_refill_task(struct work_struct *work) free_ptrs = cq->pool_ptrs; while (cq->pool_ptrs) { - bufptr = otx2_alloc_rbuf(pfvf, rbpool); - if (bufptr <= 0) { + if (otx2_alloc_rbuf(pfvf, rbpool, &bufptr)) { /* Schedule a WQ if we fails to free atleast half of the * pointers else enable napi for this RQ. */ @@ -929,7 +969,7 @@ static void otx2_pool_refill_task(struct work_struct *work) } return; } - otx2_aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM); + pfvf->hw_ops->aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM); cq->pool_ptrs--; } cq->refill_task_sched = false; @@ -1173,6 +1213,11 @@ static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, pool->rbsize = buf_size; + /* Set LMTST addr for NPA batch free */ + if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) + pool->lmt_addr = (__force u64 *)((u64)pfvf->hw.npa_lmt_base + + (pool_id * LMT_LINE_SIZE)); + /* Initialize this pool's context via AF */ aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox); if (!aq) { @@ -1213,8 +1258,8 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) struct otx2_hw *hw = &pfvf->hw; struct otx2_snd_queue *sq; struct otx2_pool *pool; + dma_addr_t bufptr; int err, ptr; - s64 bufptr; /* Calculate number of SQBs needed. * @@ -1259,10 +1304,9 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) return -ENOMEM; for (ptr = 0; ptr < num_sqbs; ptr++) { - bufptr = otx2_alloc_rbuf(pfvf, pool); - if (bufptr <= 0) - return bufptr; - otx2_aura_freeptr(pfvf, pool_id, bufptr); + if (otx2_alloc_rbuf(pfvf, pool, &bufptr)) + return -ENOMEM; + pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr); sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr; } } @@ -1280,7 +1324,7 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf) int stack_pages, pool_id, rq; struct otx2_pool *pool; int err, ptr, num_ptrs; - s64 bufptr; + dma_addr_t bufptr; num_ptrs = pfvf->qset.rqe_cnt; @@ -1310,11 +1354,10 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf) for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) { pool = &pfvf->qset.pool[pool_id]; for (ptr = 0; ptr < num_ptrs; ptr++) { - bufptr = otx2_alloc_rbuf(pfvf, pool); - if (bufptr <= 0) - return bufptr; - otx2_aura_freeptr(pfvf, pool_id, - bufptr + OTX2_HEAD_ROOM); + if (otx2_alloc_rbuf(pfvf, pool, &bufptr)) + return -ENOMEM; + pfvf->hw_ops->aura_freeptr(pfvf, pool_id, + bufptr + OTX2_HEAD_ROOM); } } @@ -1491,6 +1534,13 @@ void mbox_handler_cgx_stats(struct otx2_nic *pfvf, pfvf->hw.cgx_tx_stats[id] = rsp->tx_stats[id]; } +void mbox_handler_cgx_fec_stats(struct otx2_nic *pfvf, + struct cgx_fec_stats_rsp *rsp) +{ + pfvf->hw.cgx_fec_corr_blks += rsp->fec_corr_blks; + pfvf->hw.cgx_fec_uncorr_blks += rsp->fec_uncorr_blks; +} + void mbox_handler_nix_txsch_alloc(struct otx2_nic *pf, struct nix_txsch_alloc_rsp *rsp) { @@ -1586,6 +1636,46 @@ void otx2_set_cints_affinity(struct otx2_nic *pfvf) } } +u16 otx2_get_max_mtu(struct otx2_nic *pfvf) +{ + struct nix_hw_info *rsp; + struct msg_req *req; + u16 max_mtu; + int rc; + + mutex_lock(&pfvf->mbox.lock); + + req = otx2_mbox_alloc_msg_nix_get_hw_info(&pfvf->mbox); + if (!req) { + rc = -ENOMEM; + goto out; + } + + rc = otx2_sync_mbox_msg(&pfvf->mbox); + if (!rc) { + rsp = (struct nix_hw_info *) + otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr); + + /* HW counts VLAN insertion bytes (8 for double tag) + * irrespective of whether SQE is requesting to insert VLAN + * in the packet or not. Hence these 8 bytes have to be + * discounted from max packet size otherwise HW will throw + * SMQ errors + */ + max_mtu = rsp->max_mtu - 8 - OTX2_ETH_HLEN; + } + +out: + mutex_unlock(&pfvf->mbox.lock); + if (rc) { + dev_warn(pfvf->dev, + "Failed to get MTU from hardware setting default value(1500)\n"); + max_mtu = 1500; + } + return max_mtu; +} +EXPORT_SYMBOL(otx2_get_max_mtu); + #define M(_name, _id, _fn_name, _req_type, _rsp_type) \ int __weak \ otx2_mbox_up_handler_ ## _fn_name(struct otx2_nic *pfvf, \ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 143ae04c8ad5..4c472646a0ac 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -50,6 +50,9 @@ enum arua_mapped_qtypes { #define NIX_LF_ERR_VEC 0x81 #define NIX_LF_POISON_VEC 0x82 +/* Send skid of 2000 packets required for CQ size of 4K CQEs. */ +#define SEND_CQ_SKID 2000 + /* RSS configuration */ struct otx2_rss_ctx { u8 ind_tbl[MAX_RSS_INDIR_TBL_SIZE]; @@ -190,7 +193,6 @@ struct otx2_hw { u8 lso_tsov6_idx; u8 lso_udpv4_idx; u8 lso_udpv6_idx; - u8 hw_tso; /* MSI-X */ u8 cint_cnt; /* CQ interrupt count */ @@ -204,8 +206,20 @@ struct otx2_hw { struct otx2_drv_stats drv_stats; u64 cgx_rx_stats[CGX_RX_STATS_COUNT]; u64 cgx_tx_stats[CGX_TX_STATS_COUNT]; + u64 cgx_fec_corr_blks; + u64 cgx_fec_uncorr_blks; u8 cgx_links; /* No. of CGX links present in HW */ u8 lbk_links; /* No. of LBK links present in HW */ +#define HW_TSO BIT_ULL(0) +#define CN10K_MBOX BIT_ULL(1) +#define CN10K_LMTST BIT_ULL(2) + unsigned long cap_flag; + +#define LMT_LINE_SIZE 128 +#define NIX_LMTID_BASE 72 /* RX + TX + XDP */ + void __iomem *lmt_base; + u64 *npa_lmt_base; + u64 *nix_lmt_base; }; struct otx2_vf_config { @@ -264,9 +278,18 @@ struct otx2_flow_config { struct list_head flow_list; }; +struct dev_hw_ops { + int (*sq_aq_init)(void *dev, u16 qidx, u16 sqb_aura); + void (*sqe_flush)(void *dev, struct otx2_snd_queue *sq, + int size, int qidx); + void (*refill_pool_ptrs)(void *dev, struct otx2_cq_queue *cq); + void (*aura_freeptr)(void *dev, int aura, u64 buf); +}; + struct otx2_nic { void __iomem *reg_base; struct net_device *netdev; + struct dev_hw_ops *hw_ops; void *iommu_domain; u16 max_frs; u16 rbsize; /* Receive buffer size */ @@ -315,6 +338,10 @@ struct otx2_nic { /* Block address of NIX either BLKADDR_NIX0 or BLKADDR_NIX1 */ int nix_blkaddr; + /* LMTST Lines info */ + u16 tot_lmt_lines; + u16 nix_lmt_lines; + u32 nix_lmt_size; struct otx2_ptp *ptp; struct hwtstamp_config tstamp; @@ -339,6 +366,25 @@ static inline bool is_96xx_B0(struct pci_dev *pdev) (pdev->subsystem_device == PCI_SUBSYS_DEVID_96XX_RVU_PFVF); } +/* REVID for PCIe devices. + * Bits 0..1: minor pass, bit 3..2: major pass + * bits 7..4: midr id + */ +#define PCI_REVISION_ID_96XX 0x00 +#define PCI_REVISION_ID_95XX 0x10 +#define PCI_REVISION_ID_LOKI 0x20 +#define PCI_REVISION_ID_98XX 0x30 +#define PCI_REVISION_ID_95XXMM 0x40 + +static inline bool is_dev_otx2(struct pci_dev *pdev) +{ + u8 midr = pdev->revision & 0xF0; + + return (midr == PCI_REVISION_ID_96XX || midr == PCI_REVISION_ID_95XX || + midr == PCI_REVISION_ID_LOKI || midr == PCI_REVISION_ID_98XX || + midr == PCI_REVISION_ID_95XXMM); +} + static inline void otx2_setup_dev_hw_settings(struct otx2_nic *pfvf) { struct otx2_hw *hw = &pfvf->hw; @@ -347,10 +393,10 @@ static inline void otx2_setup_dev_hw_settings(struct otx2_nic *pfvf) pfvf->hw.cq_ecount_wait = CQ_CQE_THRESH_DEFAULT; pfvf->hw.cq_qcount_wait = CQ_QCOUNT_DEFAULT; - hw->hw_tso = true; + __set_bit(HW_TSO, &hw->cap_flag); if (is_96xx_A0(pfvf->pdev)) { - hw->hw_tso = false; + __clear_bit(HW_TSO, &hw->cap_flag); /* Time based irq coalescing is not supported */ pfvf->hw.cq_qcount_wait = 0x0; @@ -361,6 +407,10 @@ static inline void otx2_setup_dev_hw_settings(struct otx2_nic *pfvf) pfvf->hw.rq_skid = 600; pfvf->qset.rqe_cnt = Q_COUNT(Q_SIZE_1K); } + if (!is_dev_otx2(pfvf->pdev)) { + __set_bit(CN10K_MBOX, &hw->cap_flag); + __set_bit(CN10K_LMTST, &hw->cap_flag); + } } /* Register read/write APIs */ @@ -469,10 +519,51 @@ static inline u64 otx2_atomic64_add(u64 incr, u64 *ptr) } #else -#define otx2_write128(lo, hi, addr) +#define otx2_write128(lo, hi, addr) writeq((hi) | (lo), addr) #define otx2_atomic64_add(incr, ptr) ({ *ptr += incr; }) #endif +static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura, + u64 *ptrs, u64 num_ptrs, + u64 *lmt_addr) +{ + u64 size = 0, count_eot = 0; + u64 tar_addr, val = 0; + + tar_addr = (__force u64)otx2_get_regaddr(pfvf, NPA_LF_AURA_BATCH_FREE0); + /* LMTID is same as AURA Id */ + val = (aura & 0x7FF) | BIT_ULL(63); + /* Set if [127:64] of last 128bit word has a valid pointer */ + count_eot = (num_ptrs % 2) ? 0ULL : 1ULL; + /* Set AURA ID to free pointer */ + ptrs[0] = (count_eot << 32) | (aura & 0xFFFFF); + /* Target address for LMTST flush tells HW how many 128bit + * words are valid from NPA_LF_AURA_BATCH_FREE0. + * + * tar_addr[6:4] is LMTST size-1 in units of 128b. + */ + if (num_ptrs > 2) { + size = (sizeof(u64) * num_ptrs) / 16; + if (!count_eot) + size++; + tar_addr |= ((size - 1) & 0x7) << 4; + } + memcpy(lmt_addr, ptrs, sizeof(u64) * num_ptrs); + /* Perform LMTST flush */ + cn10k_lmt_flush(val, tar_addr); +} + +static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf) +{ + struct otx2_nic *pfvf = dev; + struct otx2_pool *pool; + u64 ptrs[2]; + + pool = &pfvf->qset.pool[aura]; + ptrs[1] = buf; + __cn10k_aura_freeptr(pfvf, aura, ptrs, 2, pool->lmt_addr); +} + /* Alloc pointer from pool/aura */ static inline u64 otx2_aura_allocptr(struct otx2_nic *pfvf, int aura) { @@ -484,11 +575,12 @@ static inline u64 otx2_aura_allocptr(struct otx2_nic *pfvf, int aura) } /* Free pointer to a pool/aura */ -static inline void otx2_aura_freeptr(struct otx2_nic *pfvf, - int aura, s64 buf) +static inline void otx2_aura_freeptr(void *dev, int aura, u64 buf) { - otx2_write128((u64)buf, (u64)aura | BIT_ULL(63), - otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_FREE0)); + struct otx2_nic *pfvf = dev; + void __iomem *addr = otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_FREE0); + + otx2_write128(buf, (u64)aura | BIT_ULL(63), addr); } static inline int otx2_get_pool_idx(struct otx2_nic *pfvf, int type, int idx) @@ -636,12 +728,17 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl); int otx2_txsch_alloc(struct otx2_nic *pfvf); int otx2_txschq_stop(struct otx2_nic *pfvf); void otx2_sqb_flush(struct otx2_nic *pfvf); -dma_addr_t __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool); +int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, + dma_addr_t *dma); int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable); void otx2_ctx_disable(struct mbox *mbox, int type, bool npa); int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable); void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq); void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq); +int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); +int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); +int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, + dma_addr_t *dma); /* RSS configuration APIs*/ int otx2_rss_init(struct otx2_nic *pfvf); @@ -660,6 +757,9 @@ void mbox_handler_nix_txsch_alloc(struct otx2_nic *pf, struct nix_txsch_alloc_rsp *rsp); void mbox_handler_cgx_stats(struct otx2_nic *pfvf, struct cgx_stats_rsp *rsp); +void mbox_handler_cgx_fec_stats(struct otx2_nic *pfvf, + struct cgx_fec_stats_rsp *rsp); +void otx2_set_fec_stats_count(struct otx2_nic *pfvf); void mbox_handler_nix_bp_enable(struct otx2_nic *pfvf, struct nix_bp_cfg_rsp *rsp); @@ -668,6 +768,7 @@ void otx2_get_dev_stats(struct otx2_nic *pfvf); void otx2_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats); void otx2_update_lmac_stats(struct otx2_nic *pfvf); +void otx2_update_lmac_fec_stats(struct otx2_nic *pfvf); int otx2_update_rq_stats(struct otx2_nic *pfvf, int qidx); int otx2_update_sq_stats(struct otx2_nic *pfvf, int qidx); void otx2_set_ethtool_ops(struct net_device *netdev); @@ -697,5 +798,5 @@ int otx2_del_macfilter(struct net_device *netdev, const u8 *mac); int otx2_add_macfilter(struct net_device *netdev, const u8 *mac); int otx2_enable_rxvlan(struct otx2_nic *pf, bool enable); int otx2_install_rxvlan_offload_flow(struct otx2_nic *pfvf); - +u16 otx2_get_max_mtu(struct otx2_nic *pfvf); #endif /* OTX2_COMMON_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index e0199f0e4a6c..237e5d3321d4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -14,6 +14,7 @@ #include <linux/etherdevice.h> #include <linux/log2.h> #include <linux/net_tstamp.h> +#include <linux/linkmode.h> #include "otx2_common.h" #include "otx2_ptp.h" @@ -32,6 +33,14 @@ struct otx2_stat { .index = offsetof(struct otx2_dev_stats, stat) / sizeof(u64), \ } +/* Physical link config */ +#define OTX2_ETHTOOL_SUPPORTED_MODES 0x638CCBF //110001110001100110010111111 + +enum link_mode { + OTX2_MODE_SUPPORTED, + OTX2_MODE_ADVERTISED +}; + static const struct otx2_stat otx2_dev_stats[] = { OTX2_DEV_STAT(rx_ucast_frames), OTX2_DEV_STAT(rx_bcast_frames), @@ -66,6 +75,8 @@ static const unsigned int otx2_n_dev_stats = ARRAY_SIZE(otx2_dev_stats); static const unsigned int otx2_n_drv_stats = ARRAY_SIZE(otx2_drv_stats); static const unsigned int otx2_n_queue_stats = ARRAY_SIZE(otx2_queue_stats); +static struct cgx_fw_data *otx2_get_fwdata(struct otx2_nic *pfvf); + static void otx2_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) { @@ -128,6 +139,10 @@ static void otx2_get_strings(struct net_device *netdev, u32 sset, u8 *data) strcpy(data, "reset_count"); data += ETH_GSTRING_LEN; + sprintf(data, "Fec Corrected Errors: "); + data += ETH_GSTRING_LEN; + sprintf(data, "Fec Uncorrected Errors: "); + data += ETH_GSTRING_LEN; } static void otx2_get_qset_stats(struct otx2_nic *pfvf, @@ -160,11 +175,30 @@ static void otx2_get_qset_stats(struct otx2_nic *pfvf, } } +static int otx2_get_phy_fec_stats(struct otx2_nic *pfvf) +{ + struct msg_req *req; + int rc = -ENOMEM; + + mutex_lock(&pfvf->mbox.lock); + req = otx2_mbox_alloc_msg_cgx_get_phy_fec_stats(&pfvf->mbox); + if (!req) + goto end; + + if (!otx2_sync_mbox_msg(&pfvf->mbox)) + rc = 0; +end: + mutex_unlock(&pfvf->mbox.lock); + return rc; +} + /* Get device and per queue statistics */ static void otx2_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *data) { struct otx2_nic *pfvf = netdev_priv(netdev); + u64 fec_corr_blks, fec_uncorr_blks; + struct cgx_fw_data *rsp; int stat; otx2_get_dev_stats(pfvf); @@ -183,6 +217,32 @@ static void otx2_get_ethtool_stats(struct net_device *netdev, for (stat = 0; stat < CGX_TX_STATS_COUNT; stat++) *(data++) = pfvf->hw.cgx_tx_stats[stat]; *(data++) = pfvf->reset_count; + + fec_corr_blks = pfvf->hw.cgx_fec_corr_blks; + fec_uncorr_blks = pfvf->hw.cgx_fec_uncorr_blks; + + rsp = otx2_get_fwdata(pfvf); + if (!IS_ERR(rsp) && rsp->fwdata.phy.misc.has_fec_stats && + !otx2_get_phy_fec_stats(pfvf)) { + /* Fetch fwdata again because it's been recently populated with + * latest PHY FEC stats. + */ + rsp = otx2_get_fwdata(pfvf); + if (!IS_ERR(rsp)) { + struct fec_stats_s *p = &rsp->fwdata.phy.fec_stats; + + if (pfvf->linfo.fec == OTX2_FEC_BASER) { + fec_corr_blks = p->brfec_corr_blks; + fec_uncorr_blks = p->brfec_uncorr_blks; + } else { + fec_corr_blks = p->rsfec_corr_cws; + fec_uncorr_blks = p->rsfec_uncorr_cws; + } + } + } + + *(data++) = fec_corr_blks; + *(data++) = fec_uncorr_blks; } static int otx2_get_sset_count(struct net_device *netdev, int sset) @@ -195,9 +255,11 @@ static int otx2_get_sset_count(struct net_device *netdev, int sset) qstats_count = otx2_n_queue_stats * (pfvf->hw.rx_queues + pfvf->hw.tx_queues); + otx2_update_lmac_fec_stats(pfvf); return otx2_n_dev_stats + otx2_n_drv_stats + qstats_count + - CGX_RX_STATS_COUNT + CGX_TX_STATS_COUNT + 1; + CGX_RX_STATS_COUNT + CGX_TX_STATS_COUNT + OTX2_FEC_STATS_CNT + + 1; } /* Get no of queues device supports and current queue count */ @@ -859,6 +921,304 @@ static int otx2_get_ts_info(struct net_device *netdev, return 0; } +static struct cgx_fw_data *otx2_get_fwdata(struct otx2_nic *pfvf) +{ + struct cgx_fw_data *rsp = NULL; + struct msg_req *req; + int err = 0; + + mutex_lock(&pfvf->mbox.lock); + req = otx2_mbox_alloc_msg_cgx_get_aux_link_info(&pfvf->mbox); + if (!req) { + mutex_unlock(&pfvf->mbox.lock); + return ERR_PTR(-ENOMEM); + } + + err = otx2_sync_mbox_msg(&pfvf->mbox); + if (!err) { + rsp = (struct cgx_fw_data *) + otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr); + } else { + rsp = ERR_PTR(err); + } + + mutex_unlock(&pfvf->mbox.lock); + return rsp; +} + +static int otx2_get_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fecparam) +{ + struct otx2_nic *pfvf = netdev_priv(netdev); + struct cgx_fw_data *rsp; + const int fec[] = { + ETHTOOL_FEC_OFF, + ETHTOOL_FEC_BASER, + ETHTOOL_FEC_RS, + ETHTOOL_FEC_BASER | ETHTOOL_FEC_RS}; +#define FEC_MAX_INDEX 4 + if (pfvf->linfo.fec < FEC_MAX_INDEX) + fecparam->active_fec = fec[pfvf->linfo.fec]; + + rsp = otx2_get_fwdata(pfvf); + if (IS_ERR(rsp)) + return PTR_ERR(rsp); + + if (rsp->fwdata.supported_fec <= FEC_MAX_INDEX) { + if (!rsp->fwdata.supported_fec) + fecparam->fec = ETHTOOL_FEC_NONE; + else + fecparam->fec = fec[rsp->fwdata.supported_fec]; + } + return 0; +} + +static int otx2_set_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fecparam) +{ + struct otx2_nic *pfvf = netdev_priv(netdev); + struct mbox *mbox = &pfvf->mbox; + struct fec_mode *req, *rsp; + int err = 0, fec = 0; + + switch (fecparam->fec) { + /* Firmware does not support AUTO mode consider it as FEC_OFF */ + case ETHTOOL_FEC_OFF: + case ETHTOOL_FEC_AUTO: + fec = OTX2_FEC_OFF; + break; + case ETHTOOL_FEC_RS: + fec = OTX2_FEC_RS; + break; + case ETHTOOL_FEC_BASER: + fec = OTX2_FEC_BASER; + break; + default: + netdev_warn(pfvf->netdev, "Unsupported FEC mode: %d", + fecparam->fec); + return -EINVAL; + } + + if (fec == pfvf->linfo.fec) + return 0; + + mutex_lock(&mbox->lock); + req = otx2_mbox_alloc_msg_cgx_set_fec_param(&pfvf->mbox); + if (!req) { + err = -ENOMEM; + goto end; + } + req->fec = fec; + err = otx2_sync_mbox_msg(&pfvf->mbox); + if (err) + goto end; + + rsp = (struct fec_mode *)otx2_mbox_get_rsp(&pfvf->mbox.mbox, + 0, &req->hdr); + if (rsp->fec >= 0) + pfvf->linfo.fec = rsp->fec; + else + err = rsp->fec; +end: + mutex_unlock(&mbox->lock); + return err; +} + +static void otx2_get_fec_info(u64 index, int req_mode, + struct ethtool_link_ksettings *link_ksettings) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(otx2_fec_modes) = { 0, }; + + switch (index) { + case OTX2_FEC_NONE: + linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, + otx2_fec_modes); + break; + case OTX2_FEC_BASER: + linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT, + otx2_fec_modes); + break; + case OTX2_FEC_RS: + linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT, + otx2_fec_modes); + break; + case OTX2_FEC_BASER | OTX2_FEC_RS: + linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT, + otx2_fec_modes); + linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT, + otx2_fec_modes); + break; + } + + /* Add fec modes to existing modes */ + if (req_mode == OTX2_MODE_ADVERTISED) + linkmode_or(link_ksettings->link_modes.advertising, + link_ksettings->link_modes.advertising, + otx2_fec_modes); + else + linkmode_or(link_ksettings->link_modes.supported, + link_ksettings->link_modes.supported, + otx2_fec_modes); +} + +static void otx2_get_link_mode_info(u64 link_mode_bmap, + bool req_mode, + struct ethtool_link_ksettings + *link_ksettings) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(otx2_link_modes) = { 0, }; + const int otx2_sgmii_features[6] = { + ETHTOOL_LINK_MODE_10baseT_Half_BIT, + ETHTOOL_LINK_MODE_10baseT_Full_BIT, + ETHTOOL_LINK_MODE_100baseT_Half_BIT, + ETHTOOL_LINK_MODE_100baseT_Full_BIT, + ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + }; + /* CGX link modes to Ethtool link mode mapping */ + const int cgx_link_mode[27] = { + 0, /* SGMII Mode */ + ETHTOOL_LINK_MODE_1000baseX_Full_BIT, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + 0, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, + 0, + 0, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, + 0, + ETHTOOL_LINK_MODE_50000baseSR_Full_BIT, + 0, + ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseKR_Full_BIT, + 0, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT + }; + u8 bit; + + link_mode_bmap = link_mode_bmap & OTX2_ETHTOOL_SUPPORTED_MODES; + + for_each_set_bit(bit, (unsigned long *)&link_mode_bmap, 27) { + /* SGMII mode is set */ + if (bit == 0) + linkmode_set_bit_array(otx2_sgmii_features, + ARRAY_SIZE(otx2_sgmii_features), + otx2_link_modes); + else + linkmode_set_bit(cgx_link_mode[bit], otx2_link_modes); + } + + if (req_mode == OTX2_MODE_ADVERTISED) + linkmode_copy(link_ksettings->link_modes.advertising, + otx2_link_modes); + else + linkmode_copy(link_ksettings->link_modes.supported, + otx2_link_modes); +} + +static int otx2_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct otx2_nic *pfvf = netdev_priv(netdev); + struct cgx_fw_data *rsp = NULL; + + cmd->base.duplex = pfvf->linfo.full_duplex; + cmd->base.speed = pfvf->linfo.speed; + cmd->base.autoneg = pfvf->linfo.an; + + rsp = otx2_get_fwdata(pfvf); + if (IS_ERR(rsp)) + return PTR_ERR(rsp); + + if (rsp->fwdata.supported_an) + ethtool_link_ksettings_add_link_mode(cmd, + supported, + Autoneg); + + otx2_get_link_mode_info(rsp->fwdata.advertised_link_modes, + OTX2_MODE_ADVERTISED, cmd); + otx2_get_fec_info(rsp->fwdata.advertised_fec, + OTX2_MODE_ADVERTISED, cmd); + otx2_get_link_mode_info(rsp->fwdata.supported_link_modes, + OTX2_MODE_SUPPORTED, cmd); + otx2_get_fec_info(rsp->fwdata.supported_fec, + OTX2_MODE_SUPPORTED, cmd); + return 0; +} + +static void otx2_get_advertised_mode(const struct ethtool_link_ksettings *cmd, + u64 *mode) +{ + u32 bit_pos; + + /* Firmware does not support requesting multiple advertised modes + * return first set bit + */ + bit_pos = find_first_bit(cmd->link_modes.advertising, + __ETHTOOL_LINK_MODE_MASK_NBITS); + if (bit_pos != __ETHTOOL_LINK_MODE_MASK_NBITS) + *mode = bit_pos; +} + +static int otx2_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) +{ + struct otx2_nic *pf = netdev_priv(netdev); + struct ethtool_link_ksettings cur_ks; + struct cgx_set_link_mode_req *req; + struct mbox *mbox = &pf->mbox; + int err = 0; + + memset(&cur_ks, 0, sizeof(struct ethtool_link_ksettings)); + + if (!ethtool_validate_speed(cmd->base.speed) || + !ethtool_validate_duplex(cmd->base.duplex)) + return -EINVAL; + + if (cmd->base.autoneg != AUTONEG_ENABLE && + cmd->base.autoneg != AUTONEG_DISABLE) + return -EINVAL; + + otx2_get_link_ksettings(netdev, &cur_ks); + + /* Check requested modes against supported modes by hardware */ + if (!bitmap_subset(cmd->link_modes.advertising, + cur_ks.link_modes.supported, + __ETHTOOL_LINK_MODE_MASK_NBITS)) + return -EINVAL; + + mutex_lock(&mbox->lock); + req = otx2_mbox_alloc_msg_cgx_set_link_mode(&pf->mbox); + if (!req) { + err = -ENOMEM; + goto end; + } + + req->args.speed = cmd->base.speed; + /* firmware expects 1 for half duplex and 0 for full duplex + * hence inverting + */ + req->args.duplex = cmd->base.duplex ^ 0x1; + req->args.an = cmd->base.autoneg; + otx2_get_advertised_mode(cmd, &req->args.mode); + + err = otx2_sync_mbox_msg(&pf->mbox); +end: + mutex_unlock(&mbox->lock); + return err; +} + static const struct ethtool_ops otx2_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES, @@ -886,6 +1246,10 @@ static const struct ethtool_ops otx2_ethtool_ops = { .get_pauseparam = otx2_get_pauseparam, .set_pauseparam = otx2_set_pauseparam, .get_ts_info = otx2_get_ts_info, + .get_fecparam = otx2_get_fecparam, + .set_fecparam = otx2_set_fecparam, + .get_link_ksettings = otx2_get_link_ksettings, + .set_link_ksettings = otx2_set_link_ksettings, }; void otx2_set_ethtool_ops(struct net_device *netdev) @@ -960,6 +1324,20 @@ static int otx2vf_get_sset_count(struct net_device *netdev, int sset) return otx2_n_dev_stats + otx2_n_drv_stats + qstats_count + 1; } +static int otx2vf_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct otx2_nic *pfvf = netdev_priv(netdev); + + if (is_otx2_lbkvf(pfvf->pdev)) { + cmd->base.duplex = DUPLEX_FULL; + cmd->base.speed = SPEED_100000; + } else { + return otx2_get_link_ksettings(netdev, cmd); + } + return 0; +} + static const struct ethtool_ops otx2vf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES, @@ -986,6 +1364,7 @@ static const struct ethtool_ops otx2vf_ethtool_ops = { .set_msglevel = otx2_set_msglevel, .get_pauseparam = otx2_get_pauseparam, .set_pauseparam = otx2_set_pauseparam, + .get_link_ksettings = otx2vf_get_link_ksettings, }; void otx2vf_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 07ec85aebcca..53ab1814d74b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -22,10 +22,11 @@ #include "otx2_txrx.h" #include "otx2_struct.h" #include "otx2_ptp.h" +#include "cn10k.h" #include <rvu_trace.h> -#define DRV_NAME "octeontx2-nicpf" -#define DRV_STRING "Marvell OcteonTX2 NIC Physical Function Driver" +#define DRV_NAME "rvu_nicpf" +#define DRV_STRING "Marvell RVU NIC Physical Function Driver" /* Supported devices */ static const struct pci_device_id otx2_pf_id_table[] = { @@ -585,9 +586,17 @@ static int otx2_pfvf_mbox_init(struct otx2_nic *pf, int numvfs) if (!pf->mbox_pfvf_wq) return -ENOMEM; - base = readq((void __iomem *)((u64)pf->reg_base + RVU_PF_VF_BAR4_ADDR)); - hwbase = ioremap_wc(base, MBOX_SIZE * pf->total_vfs); + /* On CN10K platform, PF <-> VF mailbox region follows after + * PF <-> AF mailbox region. + */ + if (test_bit(CN10K_MBOX, &pf->hw.cap_flag)) + base = pci_resource_start(pf->pdev, PCI_MBOX_BAR_NUM) + + MBOX_SIZE; + else + base = readq((void __iomem *)((u64)pf->reg_base + + RVU_PF_VF_BAR4_ADDR)); + hwbase = ioremap_wc(base, MBOX_SIZE * pf->total_vfs); if (!hwbase) { err = -ENOMEM; goto free_wq; @@ -779,6 +788,9 @@ static void otx2_process_pfaf_mbox_msg(struct otx2_nic *pf, case MBOX_MSG_CGX_STATS: mbox_handler_cgx_stats(pf, (struct cgx_stats_rsp *)msg); break; + case MBOX_MSG_CGX_FEC_STATS: + mbox_handler_cgx_fec_stats(pf, (struct cgx_fec_stats_rsp *)msg); + break; default: if (msg->rc) dev_err(pf->dev, @@ -1039,7 +1051,7 @@ static int otx2_pfaf_mbox_init(struct otx2_nic *pf) * device memory to allow unaligned accesses. */ hwbase = ioremap_wc(pci_resource_start(pf->pdev, PCI_MBOX_BAR_NUM), - pci_resource_len(pf->pdev, PCI_MBOX_BAR_NUM)); + MBOX_SIZE); if (!hwbase) { dev_err(pf->dev, "Unable to map PFAF mailbox region\n"); err = -ENOMEM; @@ -1276,6 +1288,33 @@ static void otx2_free_sq_res(struct otx2_nic *pf) } } +static int otx2_get_rbuf_size(struct otx2_nic *pf, int mtu) +{ + int frame_size; + int total_size; + int rbuf_size; + + /* The data transferred by NIX to memory consists of actual packet + * plus additional data which has timestamp and/or EDSA/HIGIG2 + * headers if interface is configured in corresponding modes. + * NIX transfers entire data using 6 segments/buffers and writes + * a CQE_RX descriptor with those segment addresses. First segment + * has additional data prepended to packet. Also software omits a + * headroom of 128 bytes and sizeof(struct skb_shared_info) in + * each segment. Hence the total size of memory needed + * to receive a packet with 'mtu' is: + * frame size = mtu + additional data; + * memory = frame_size + (headroom + struct skb_shared_info size) * 6; + * each receive buffer size = memory / 6; + */ + frame_size = mtu + OTX2_ETH_HLEN + OTX2_HW_TIMESTAMP_LEN; + total_size = frame_size + (OTX2_HEAD_ROOM + + OTX2_DATA_ALIGN(sizeof(struct skb_shared_info))) * 6; + rbuf_size = total_size / 6; + + return ALIGN(rbuf_size, 2048); +} + static int otx2_init_hw_resources(struct otx2_nic *pf) { struct nix_lf_free_req *free_req; @@ -1292,9 +1331,9 @@ static int otx2_init_hw_resources(struct otx2_nic *pf) hw->sqpool_cnt = hw->tx_queues; hw->pool_cnt = hw->rqpool_cnt + hw->sqpool_cnt; - /* Get the size of receive buffers to allocate */ - pf->rbsize = RCV_FRAG_LEN(OTX2_HW_TIMESTAMP_LEN + pf->netdev->mtu + - OTX2_ETH_HLEN); + pf->max_frs = pf->netdev->mtu + OTX2_ETH_HLEN + OTX2_HW_TIMESTAMP_LEN; + + pf->rbsize = otx2_get_rbuf_size(pf, pf->netdev->mtu); mutex_lock(&mbox->lock); /* NPA init */ @@ -1487,6 +1526,14 @@ int otx2_open(struct net_device *netdev) if (!qset->rq) goto err_free_mem; + if (test_bit(CN10K_LMTST, &pf->hw.cap_flag)) { + /* Reserve LMT lines for NPA AURA batch free */ + pf->hw.npa_lmt_base = (__force u64 *)pf->hw.lmt_base; + /* Reserve LMT lines for NIX TX */ + pf->hw.nix_lmt_base = (__force u64 *)((u64)pf->hw.npa_lmt_base + + (NIX_LMTID_BASE * LMT_LINE_SIZE)); + } + err = otx2_init_hw_resources(pf); if (err) goto err_free_mem; @@ -2325,6 +2372,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_free_netdev; } + otx2_setup_dev_hw_settings(pf); + /* Init PF <=> AF mailbox stuff */ err = otx2_pfaf_mbox_init(pf); if (err) @@ -2350,7 +2399,9 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_detach_rsrc; - otx2_setup_dev_hw_settings(pf); + err = cn10k_pf_lmtst_init(pf); + if (err) + goto err_detach_rsrc; /* Assign default mac address */ otx2_get_mac_from_af(netdev); @@ -2405,7 +2456,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* MTU range: 64 - 9190 */ netdev->min_mtu = OTX2_MIN_MTU; - netdev->max_mtu = OTX2_MAX_MTU; + netdev->max_mtu = otx2_get_max_mtu(pf); err = register_netdev(netdev); if (err) { @@ -2435,6 +2486,8 @@ err_del_mcam_entries: err_ptp_destroy: otx2_ptp_destroy(pf); err_detach_rsrc: + if (hw->lmt_base) + iounmap(hw->lmt_base); otx2_detach_resources(&pf->mbox); err_disable_mbox_intr: otx2_disable_mbox_intr(pf); @@ -2594,6 +2647,9 @@ static void otx2_remove(struct pci_dev *pdev) otx2_ptp_destroy(pf); otx2_mcam_flow_del(pf); otx2_detach_resources(&pf->mbox); + if (pf->hw.lmt_base) + iounmap(pf->hw.lmt_base); + otx2_disable_mbox_intr(pf); otx2_pfaf_mbox_destroy(pf); pci_free_irq_vectors(pf->pdev); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h index 867f646e0802..21b811c6ee0f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h @@ -44,6 +44,8 @@ #define RVU_PF_MSIX_VECX_ADDR(a) (0x000 | (a) << 4) #define RVU_PF_MSIX_VECX_CTL(a) (0x008 | (a) << 4) #define RVU_PF_MSIX_PBAX(a) (0xF0000 | (a) << 3) +#define RVU_PF_VF_MBOX_ADDR (0xC40) +#define RVU_PF_LMTLINE_ADDR (0xC48) /* RVU VF registers */ #define RVU_VF_VFPF_MBOX0 (0x00000) @@ -57,6 +59,7 @@ #define RVU_VF_MSIX_VECX_ADDR(a) (0x000 | (a) << 4) #define RVU_VF_MSIX_VECX_CTL(a) (0x008 | (a) << 4) #define RVU_VF_MSIX_PBAX(a) (0xF0000 | (a) << 3) +#define RVU_VF_MBOX_REGION (0xC0000) #define RVU_FUNC_BLKADDR_SHIFT 20 #define RVU_FUNC_BLKADDR_MASK 0x1FULL @@ -91,6 +94,7 @@ #define NPA_LF_QINTX_INT_W1S(a) (NPA_LFBASE | 0x318 | (a) << 12) #define NPA_LF_QINTX_ENA_W1S(a) (NPA_LFBASE | 0x320 | (a) << 12) #define NPA_LF_QINTX_ENA_W1C(a) (NPA_LFBASE | 0x330 | (a) << 12) +#define NPA_LF_AURA_BATCH_FREE0 (NPA_LFBASE | 0x400) /* NIX LF registers */ #define NIX_LFBASE (BLKTYPE_NIX << RVU_FUNC_BLKADDR_SHIFT) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h index cba59ddf71bb..1f49b3caf5d4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h @@ -142,7 +142,9 @@ struct nix_rx_parse_s { u64 vtag0_ptr : 8; /* W5 */ u64 vtag1_ptr : 8; u64 flow_key_alg : 5; - u64 rsvd_383_341 : 43; + u64 rsvd_359_341 : 19; + u64 color : 2; + u64 rsvd_383_362 : 22; u64 rsvd_447_384; /* W6 */ }; @@ -218,7 +220,8 @@ struct nix_sqe_ext_s { u64 vlan1_ins_tci : 16; u64 vlan0_ins_ena : 1; u64 vlan1_ins_ena : 1; - u64 rsvd_127_114 : 14; + u64 init_color : 2; + u64 rsvd_127_116 : 12; }; struct nix_sqe_sg_s { @@ -237,7 +240,8 @@ struct nix_sqe_sg_s { /* NIX send memory subdescriptor structure */ struct nix_sqe_mem_s { u64 offset : 16; /* W0 */ - u64 rsvd_52_16 : 37; + u64 rsvd_51_16 : 36; + u64 per_lso_seg : 1; u64 wmem : 1; u64 dsz : 2; u64 alg : 4; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index d0e25414f1a1..3f778fc054b5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -17,6 +17,7 @@ #include "otx2_struct.h" #include "otx2_txrx.h" #include "otx2_ptp.h" +#include "cn10k.h" #define CQE_ADDR(CQ, idx) ((CQ)->cqe_base + ((CQ)->cqe_size * (idx))) @@ -199,7 +200,8 @@ static void otx2_free_rcv_seg(struct otx2_nic *pfvf, struct nix_cqe_rx_s *cqe, sg = (struct nix_rx_sg_s *)start; seg_addr = &sg->seg_addr; for (seg = 0; seg < sg->segs; seg++, seg_addr++) - otx2_aura_freeptr(pfvf, qidx, *seg_addr & ~0x07ULL); + pfvf->hw_ops->aura_freeptr(pfvf, qidx, + *seg_addr & ~0x07ULL); start += sizeof(*sg); } } @@ -255,12 +257,11 @@ static bool otx2_check_rcv_errors(struct otx2_nic *pfvf, /* For now ignore all the NPC parser errors and * pass the packets to stack. */ - if (cqe->sg.segs == 1) - return false; + return false; } /* If RXALL is enabled pass on packets to stack. */ - if (cqe->sg.segs == 1 && (pfvf->netdev->features & NETIF_F_RXALL)) + if (pfvf->netdev->features & NETIF_F_RXALL) return false; /* Free buffer back to pool */ @@ -275,9 +276,14 @@ static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf, struct nix_cqe_rx_s *cqe) { struct nix_rx_parse_s *parse = &cqe->parse; + struct nix_rx_sg_s *sg = &cqe->sg; struct sk_buff *skb = NULL; + void *end, *start; + u64 *seg_addr; + u16 *seg_size; + int seg; - if (unlikely(parse->errlev || parse->errcode || cqe->sg.segs > 1)) { + if (unlikely(parse->errlev || parse->errcode)) { if (otx2_check_rcv_errors(pfvf, cqe, cq->cq_idx)) return; } @@ -286,9 +292,19 @@ static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf, if (unlikely(!skb)) return; - otx2_skb_add_frag(pfvf, skb, cqe->sg.seg_addr, cqe->sg.seg_size, parse); - cq->pool_ptrs++; - + start = (void *)sg; + end = start + ((cqe->parse.desc_sizem1 + 1) * 16); + while (start < end) { + sg = (struct nix_rx_sg_s *)start; + seg_addr = &sg->seg_addr; + seg_size = (void *)sg; + for (seg = 0; seg < sg->segs; seg++, seg_addr++) { + otx2_skb_add_frag(pfvf, skb, *seg_addr, seg_size[seg], + parse); + cq->pool_ptrs++; + } + start += sizeof(*sg); + } otx2_set_rxhash(pfvf, cqe, skb); skb_record_rx_queue(skb, cq->cq_idx); @@ -304,7 +320,6 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf, { struct nix_cqe_rx_s *cqe; int processed_cqe = 0; - s64 bufptr; while (likely(processed_cqe < budget)) { cqe = (struct nix_cqe_rx_s *)CQE_ADDR(cq, cq->cq_head); @@ -330,29 +345,23 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf, if (unlikely(!cq->pool_ptrs)) return 0; - /* Refill pool with new buffers */ + pfvf->hw_ops->refill_pool_ptrs(pfvf, cq); + + return processed_cqe; +} + +void otx2_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq) +{ + struct otx2_nic *pfvf = dev; + dma_addr_t bufptr; + while (cq->pool_ptrs) { - bufptr = __otx2_alloc_rbuf(pfvf, cq->rbpool); - if (unlikely(bufptr <= 0)) { - struct refill_work *work; - struct delayed_work *dwork; - - work = &pfvf->refill_wrk[cq->cq_idx]; - dwork = &work->pool_refill_work; - /* Schedule a task if no other task is running */ - if (!cq->refill_task_sched) { - cq->refill_task_sched = true; - schedule_delayed_work(dwork, - msecs_to_jiffies(100)); - } + if (otx2_alloc_buffer(pfvf, cq, &bufptr)) break; - } otx2_aura_freeptr(pfvf, cq->cq_idx, bufptr + OTX2_HEAD_ROOM); cq->pool_ptrs--; } - - return processed_cqe; } static int otx2_tx_napi_handler(struct otx2_nic *pfvf, @@ -439,7 +448,8 @@ int otx2_napi_handler(struct napi_struct *napi, int budget) return workdone; } -static void otx2_sqe_flush(struct otx2_snd_queue *sq, int size) +void otx2_sqe_flush(void *dev, struct otx2_snd_queue *sq, + int size, int qidx) { u64 status; @@ -797,7 +807,7 @@ static void otx2_sq_append_tso(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, sqe_hdr->sizem1 = (offset / 16) - 1; /* Flush SQE to HW */ - otx2_sqe_flush(sq, offset); + pfvf->hw_ops->sqe_flush(pfvf, sq, offset, qidx); } } @@ -806,8 +816,6 @@ static bool is_hw_tso_supported(struct otx2_nic *pfvf, { int payload_len, last_seg_size; - if (!pfvf->hw.hw_tso) - return false; /* HW has an issue due to which when the payload of the last LSO * segment is shorter than 16 bytes, some header fields may not @@ -821,6 +829,9 @@ static bool is_hw_tso_supported(struct otx2_nic *pfvf, if (last_seg_size && last_seg_size < 16) return false; + if (!test_bit(HW_TSO, &pfvf->hw.cap_flag)) + return false; + return true; } @@ -915,7 +926,7 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq, netdev_tx_sent_queue(txq, skb->len); /* Flush SQE to HW */ - otx2_sqe_flush(sq, offset); + pfvf->hw_ops->sqe_flush(pfvf, sq, offset, qidx); return true; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h index 73af15685657..52486c1f0973 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h @@ -24,7 +24,6 @@ #define OTX2_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN) #define OTX2_MIN_MTU 64 -#define OTX2_MAX_MTU (9212 - OTX2_ETH_HLEN) #define OTX2_MAX_GSO_SEGS 255 #define OTX2_MAX_FRAGS_IN_SQE 9 @@ -114,6 +113,7 @@ struct otx2_cq_poll { struct otx2_pool { struct qmem *stack; struct qmem *fc_addr; + u64 *lmt_addr; u16 rbsize; }; @@ -156,4 +156,10 @@ static inline u64 otx2_iova_to_phys(void *iommu_domain, dma_addr_t dma_addr) int otx2_napi_handler(struct napi_struct *napi, int budget); bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq, struct sk_buff *skb, u16 qidx); +void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, + int size, int qidx); +void otx2_sqe_flush(void *dev, struct otx2_snd_queue *sq, + int size, int qidx); +void otx2_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq); +void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq); #endif /* OTX2_TXRX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index d3e4cfd244e2..085be90a03eb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -7,9 +7,10 @@ #include "otx2_common.h" #include "otx2_reg.h" +#include "cn10k.h" -#define DRV_NAME "octeontx2-nicvf" -#define DRV_STRING "Marvell OcteonTX2 NIC Virtual Function Driver" +#define DRV_NAME "rvu_nicvf" +#define DRV_STRING "Marvell RVU NIC Virtual Function Driver" static const struct pci_device_id otx2_vf_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_RVU_AFVF) }, @@ -277,7 +278,7 @@ static void otx2vf_vfaf_mbox_destroy(struct otx2_nic *vf) vf->mbox_wq = NULL; } - if (mbox->mbox.hwbase) + if (mbox->mbox.hwbase && !test_bit(CN10K_MBOX, &vf->hw.cap_flag)) iounmap((void __iomem *)mbox->mbox.hwbase); otx2_mbox_destroy(&mbox->mbox); @@ -297,16 +298,25 @@ static int otx2vf_vfaf_mbox_init(struct otx2_nic *vf) if (!vf->mbox_wq) return -ENOMEM; - /* Mailbox is a reserved memory (in RAM) region shared between - * admin function (i.e PF0) and this VF, shouldn't be mapped as - * device memory to allow unaligned accesses. - */ - hwbase = ioremap_wc(pci_resource_start(vf->pdev, PCI_MBOX_BAR_NUM), - pci_resource_len(vf->pdev, PCI_MBOX_BAR_NUM)); - if (!hwbase) { - dev_err(vf->dev, "Unable to map VFAF mailbox region\n"); - err = -ENOMEM; - goto exit; + if (test_bit(CN10K_MBOX, &vf->hw.cap_flag)) { + /* For cn10k platform, VF mailbox region is in its BAR2 + * register space + */ + hwbase = vf->reg_base + RVU_VF_MBOX_REGION; + } else { + /* Mailbox is a reserved memory (in RAM) region shared between + * admin function (i.e PF0) and this VF, shouldn't be mapped as + * device memory to allow unaligned accesses. + */ + hwbase = ioremap_wc(pci_resource_start(vf->pdev, + PCI_MBOX_BAR_NUM), + pci_resource_len(vf->pdev, + PCI_MBOX_BAR_NUM)); + if (!hwbase) { + dev_err(vf->dev, "Unable to map VFAF mailbox region\n"); + err = -ENOMEM; + goto exit; + } } err = otx2_mbox_init(&mbox->mbox, hwbase, vf->pdev, vf->reg_base, @@ -329,6 +339,8 @@ static int otx2vf_vfaf_mbox_init(struct otx2_nic *vf) return 0; exit: + if (hwbase && !test_bit(CN10K_MBOX, &vf->hw.cap_flag)) + iounmap(hwbase); destroy_workqueue(vf->mbox_wq); return err; } @@ -525,6 +537,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_free_irq_vectors; } + otx2_setup_dev_hw_settings(vf); /* Init VF <=> PF mailbox stuff */ err = otx2vf_vfaf_mbox_init(vf); if (err) @@ -548,7 +561,9 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_detach_rsrc; - otx2_setup_dev_hw_settings(vf); + err = cn10k_vf_lmtst_init(vf); + if (err) + goto err_detach_rsrc; /* Assign default mac address */ otx2_get_mac_from_af(netdev); @@ -571,7 +586,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* MTU range: 68 - 9190 */ netdev->min_mtu = OTX2_MIN_MTU; - netdev->max_mtu = OTX2_MAX_MTU; + netdev->max_mtu = otx2_get_max_mtu(vf); INIT_WORK(&vf->reset_task, otx2vf_reset_task); @@ -600,6 +615,8 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_detach_rsrc: + if (hw->lmt_base) + iounmap(hw->lmt_base); otx2_detach_resources(&vf->mbox); err_disable_mbox_intr: otx2vf_disable_mbox_intr(vf); @@ -628,8 +645,11 @@ static void otx2vf_remove(struct pci_dev *pdev) cancel_work_sync(&vf->reset_task); unregister_netdev(netdev); otx2vf_disable_mbox_intr(vf); - otx2_detach_resources(&vf->mbox); + + if (vf->hw.lmt_base) + iounmap(vf->hw.lmt_base); + otx2vf_vfaf_mbox_destroy(vf); pci_free_irq_vectors(vf->pdev); pci_set_drvdata(pdev, NULL); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index ad45d20f9d44..9d623e38d783 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -149,14 +149,14 @@ config MLX5_IPSEC IPsec support for the Connect-X family. config MLX5_EN_IPSEC - bool "IPSec XFRM cryptography-offload accelaration" + bool "IPSec XFRM cryptography-offload acceleration" depends on MLX5_CORE_EN depends on XFRM_OFFLOAD depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD depends on MLX5_FPGA_IPSEC || MLX5_IPSEC default n help - Build support for IPsec cryptography-offload accelaration in the NIC. + Build support for IPsec cryptography-offload acceleration in the NIC. Note: Support for hardware with this capability needs to be selected for this option to become available. @@ -166,7 +166,6 @@ config MLX5_FPGA_TLS depends on TLS=y || MLX5_CORE=m depends on MLX5_CORE_EN depends on MLX5_FPGA - depends on XPS select MLX5_EN_TLS default n help @@ -181,7 +180,6 @@ config MLX5_TLS depends on TLS_DEVICE depends on TLS=y || MLX5_CORE=m depends on MLX5_CORE_EN - depends on XPS select MLX5_ACCEL select MLX5_EN_TLS default n @@ -192,7 +190,7 @@ config MLX5_TLS config MLX5_EN_TLS bool help - Build support for TLS cryptography-offload accelaration in the NIC. + Build support for TLS cryptography-offload acceleration in the NIC. Note: Support for hardware with this capability needs to be selected for this option to become available. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 8809dd4de57e..8cb2625472c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -40,6 +40,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag_mp.o lib/geneve.o lib/port_tun.o \ en_rep.o en/rep/bond.o en/mod_hdr.o mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ en/mapping.o lib/fs_chains.o en/tc_tun.o \ + esw/indir_table.o en/tc_tun_encap.o \ en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \ en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h index 1177860a2ee4..f15718db5d0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_rep_tracepoint.h @@ -15,7 +15,7 @@ TRACE_EVENT(mlx5e_rep_neigh_update, TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, const u8 *ha, bool neigh_connected), TP_ARGS(nhe, ha, neigh_connected), - TP_STRUCT__entry(__string(devname, nhe->m_neigh.dev->name) + TP_STRUCT__entry(__string(devname, nhe->neigh_dev->name) __array(u8, ha, ETH_ALEN) __array(u8, v4, 4) __array(u8, v6, 16) @@ -25,7 +25,7 @@ TRACE_EVENT(mlx5e_rep_neigh_update, struct in6_addr *pin6; __be32 *p32; - __assign_str(devname, mn->dev->name); + __assign_str(devname, nhe->neigh_dev->name); __entry->neigh_connected = neigh_connected; memcpy(__entry->ha, ha, ETH_ALEN); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h index d4e6cfaaade3..ac52ef37f38a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h @@ -77,7 +77,7 @@ TRACE_EVENT(mlx5e_stats_flower, TRACE_EVENT(mlx5e_tc_update_neigh_used_value, TP_PROTO(const struct mlx5e_neigh_hash_entry *nhe, bool neigh_used), TP_ARGS(nhe, neigh_used), - TP_STRUCT__entry(__string(devname, nhe->m_neigh.dev->name) + TP_STRUCT__entry(__string(devname, nhe->neigh_dev->name) __array(u8, v4, 4) __array(u8, v6, 16) __field(bool, neigh_used) @@ -86,7 +86,7 @@ TRACE_EVENT(mlx5e_tc_update_neigh_used_value, struct in6_addr *pin6; __be32 *p32; - __assign_str(devname, mn->dev->name); + __assign_str(devname, nhe->neigh_dev->name); __entry->neigh_used = neigh_used; p32 = (__be32 *)__entry->v4; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c index 616ee585a985..be0ee03de721 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c @@ -129,10 +129,10 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) work); struct mlx5e_neigh_hash_entry *nhe = update_work->nhe; struct neighbour *n = update_work->n; + bool neigh_connected, same_dev; struct mlx5e_encap_entry *e; unsigned char ha[ETH_ALEN]; struct mlx5e_priv *priv; - bool neigh_connected; u8 nud_state, dead; rtnl_lock(); @@ -146,12 +146,16 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) memcpy(ha, n->ha, ETH_ALEN); nud_state = n->nud_state; dead = n->dead; + same_dev = READ_ONCE(nhe->neigh_dev) == n->dev; read_unlock_bh(&n->lock); neigh_connected = (nud_state & NUD_VALID) && !dead; trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected); + if (!same_dev) + goto out; + list_for_each_entry(e, &nhe->encap_list, encap_list) { if (!mlx5e_encap_take(e)) continue; @@ -160,6 +164,7 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) mlx5e_rep_update_flows(priv, e, neigh_connected, ha); mlx5e_encap_put(priv, e); } +out: rtnl_unlock(); mlx5e_release_neigh_update_work(update_work); } @@ -175,7 +180,6 @@ static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv if (WARN_ON(!update_work)) return NULL; - m_neigh.dev = n->dev; m_neigh.family = n->ops->family; memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); @@ -246,7 +250,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb, rcu_read_lock(); list_for_each_entry_rcu(nhe, &neigh_update->neigh_list, neigh_list) { - if (p->dev == nhe->m_neigh.dev) { + if (p->dev == READ_ONCE(nhe->neigh_dev)) { found = true; break; } @@ -369,7 +373,8 @@ mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, } int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e, + struct mlx5e_neigh *m_neigh, + struct net_device *neigh_dev, struct mlx5e_neigh_hash_entry **nhe) { int err; @@ -379,10 +384,11 @@ int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, return -ENOMEM; (*nhe)->priv = priv; - memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); + memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh)); spin_lock_init(&(*nhe)->encap_list_lock); INIT_LIST_HEAD(&(*nhe)->encap_list); refcount_set(&(*nhe)->refcnt, 1); + WRITE_ONCE((*nhe)->neigh_dev, neigh_dev); err = mlx5e_rep_neigh_entry_insert(priv, *nhe); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h index 32b239189c95..6fe0ab970943 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h @@ -16,7 +16,8 @@ struct mlx5e_neigh_hash_entry * mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, struct mlx5e_neigh *m_neigh); int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e, + struct mlx5e_neigh *m_neigh, + struct net_device *neigh_dev, struct mlx5e_neigh_hash_entry **nhe); void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 76177f7c5ec2..065126370acd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -26,7 +26,9 @@ struct mlx5e_rep_indr_block_priv { }; int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e) + struct mlx5e_encap_entry *e, + struct mlx5e_neigh *m_neigh, + struct net_device *neigh_dev) { struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; @@ -39,9 +41,9 @@ int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, return err; mutex_lock(&rpriv->neigh_update.encap_lock); - nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); + nhe = mlx5e_rep_neigh_entry_lookup(priv, m_neigh); if (!nhe) { - err = mlx5e_rep_neigh_entry_create(priv, e, &nhe); + err = mlx5e_rep_neigh_entry_create(priv, m_neigh, neigh_dev, &nhe); if (err) { mutex_unlock(&rpriv->neigh_update.encap_lock); mlx5_tun_entropy_refcount_dec(tun_entropy, @@ -122,7 +124,7 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv, } unlock: mutex_unlock(&esw->offloads.encap_tbl_lock); - mlx5e_put_encap_flow_list(priv, &flow_list); + mlx5e_put_flow_list(priv, &flow_list); } static int @@ -651,7 +653,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, tc_skb_ext->chain = chain; - zone_restore_id = reg_c1 & ZONE_RESTORE_MAX; + zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK; uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); uplink_priv = &uplink_rpriv->uplink_priv; @@ -660,7 +662,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, return false; } - tunnel_id = reg_c1 >> REG_MAPPING_SHIFT(TUNNEL_TO_REG); + tunnel_id = reg_c1 >> ESW_TUN_OFFSET; return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); #endif /* CONFIG_NET_TC_SKB_EXT */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h index fdf9702c2d7d..d0661578467b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h @@ -27,7 +27,9 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv, unsigned char ha[ETH_ALEN]); int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e); + struct mlx5e_encap_entry *e, + struct mlx5e_neigh *m_neigh, + struct net_device *neigh_dev); void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 40aaa105b2fc..0b503ebe59ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -711,6 +711,8 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, attr->outer_match_level = MLX5_MATCH_L4; attr->counter = entry->counter->counter; attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; + if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) + attr->esw_attr->in_mdev = priv->mdev; mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule); mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK); @@ -1761,7 +1763,6 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv, goto err_set_registers; } - dealloc_mod_hdr_actions(mod_acts); pre_ct_attr->modify_hdr = mod_hdr; pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 6503b614337c..69e618d17071 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -73,7 +73,7 @@ struct mlx5_ct_attr { #define zone_restore_to_reg_ct {\ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,\ .moffset = 0,\ - .mlen = 1,\ + .mlen = (ESW_ZONE_ID_BITS / 8),\ .soffset = MLX5_BYTE_OFF(fte_match_param,\ misc_parameters_2.metadata_reg_c_1) + 3,\ } @@ -81,14 +81,12 @@ struct mlx5_ct_attr { #define nic_zone_restore_to_reg_ct {\ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,\ .moffset = 2,\ - .mlen = 1,\ + .mlen = (ESW_ZONE_ID_BITS / 8),\ } #define REG_MAPPING_MLEN(reg) (mlx5e_tc_attr_to_reg_mappings[reg].mlen) #define REG_MAPPING_MOFFSET(reg) (mlx5e_tc_attr_to_reg_mappings[reg].moffset) #define REG_MAPPING_SHIFT(reg) (REG_MAPPING_MOFFSET(reg) * 8) -#define ZONE_RESTORE_BITS (REG_MAPPING_MLEN(ZONE_RESTORE_TO_REG) * 8) -#define ZONE_RESTORE_MAX GENMASK(ZONE_RESTORE_BITS - 1, 0) #if IS_ENABLED(CONFIG_MLX5_TC_CT) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h new file mode 100644 index 000000000000..c223591ffc22 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_PRIV_H__ +#define __MLX5_EN_TC_PRIV_H__ + +#include "en_tc.h" + +#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1) + +#define MLX5E_TC_MAX_SPLITS 1 + +enum { + MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT, + MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT, + MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE, + MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1, + MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2, + MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3, + MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4, + MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, + MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, + MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7, + MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8, + MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 9, + MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 10, +}; + +struct mlx5e_tc_flow_parse_attr { + const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; + struct net_device *filter_dev; + struct mlx5_flow_spec spec; + struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; + int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; + struct ethhdr eth; +}; + +/* Helper struct for accessing a struct containing list_head array. + * Containing struct + * |- Helper array + * [0] Helper item 0 + * |- list_head item 0 + * |- index (0) + * [1] Helper item 1 + * |- list_head item 1 + * |- index (1) + * To access the containing struct from one of the list_head items: + * 1. Get the helper item from the list_head item using + * helper item = + * container_of(list_head item, helper struct type, list_head field) + * 2. Get the contining struct from the helper item and its index in the array: + * containing struct = + * container_of(helper item, containing struct type, helper field[index]) + */ +struct encap_flow_item { + struct mlx5e_encap_entry *e; /* attached encap instance */ + struct list_head list; + int index; +}; + +struct encap_route_flow_item { + struct mlx5e_route_entry *r; /* attached route instance */ + int index; +}; + +struct mlx5e_tc_flow { + struct rhash_head node; + struct mlx5e_priv *priv; + u64 cookie; + unsigned long flags; + struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; + + /* flows sharing the same reformat object - currently mpls decap */ + struct list_head l3_to_l2_reformat; + struct mlx5e_decap_entry *decap_reformat; + + /* flows sharing same route entry */ + struct list_head decap_routes; + struct mlx5e_route_entry *decap_route; + struct encap_route_flow_item encap_routes[MLX5_MAX_FLOW_FWD_VPORTS]; + + /* Flow can be associated with multiple encap IDs. + * The number of encaps is bounded by the number of supported + * destinations. + */ + struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5e_tc_flow *peer_flow; + struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */ + struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */ + struct list_head hairpin; /* flows sharing the same hairpin */ + struct list_head peer; /* flows with peer flow */ + struct list_head unready; /* flows not ready to be offloaded (e.g + * due to missing route) + */ + struct net_device *orig_dev; /* netdev adding flow first */ + int tmp_entry_index; + struct list_head tmp_list; /* temporary flow list used by neigh update */ + refcount_t refcnt; + struct rcu_head rcu_head; + struct completion init_done; + int tunnel_id; /* the mapped tunnel id of this flow */ + struct mlx5_flow_attr *attr; +}; + +u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer); + +struct mlx5_flow_handle * +mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); + +bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow); + +static inline void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + /* Complete all memory stores before setting bit. */ + smp_mb__before_atomic(); + set_bit(flag, &flow->flags); +} + +#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag) + +static inline bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow, + unsigned long flag) +{ + /* test_and_set_bit() provides all necessary barriers */ + return test_and_set_bit(flag, &flow->flags); +} + +#define flow_flag_test_and_set(flow, flag) \ + __flow_flag_test_and_set(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +static inline void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + /* Complete all memory stores before clearing bit. */ + smp_mb__before_atomic(); + clear_bit(flag, &flow->flags); +} + +#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +static inline bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag) +{ + bool ret = test_bit(flag, &flow->flags); + + /* Read fields of flow structure only after checking flags. */ + smp_mb__after_atomic(); + return ret; +} + +#define flow_flag_test(flow, flag) __flow_flag_test(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + +void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow); +struct mlx5_flow_handle * +mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec); +void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr); + +struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow); +void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); + +struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow); + +#endif /* __MLX5_EN_TC_PRIV_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 90930e54b6f2..f8075a604605 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -6,10 +6,32 @@ #include <net/geneve.h> #include <net/bareudp.h> #include "en/tc_tun.h" +#include "en/tc_priv.h" #include "en_tc.h" #include "rep/tc.h" #include "rep/neigh.h" +struct mlx5e_tc_tun_route_attr { + struct net_device *out_dev; + struct net_device *route_dev; + union { + struct flowi4 fl4; + struct flowi6 fl6; + } fl; + struct neighbour *n; + u8 ttl; +}; + +#define TC_TUN_ROUTE_ATTR_INIT(name) struct mlx5e_tc_tun_route_attr name = {} + +static void mlx5e_tc_tun_route_attr_cleanup(struct mlx5e_tc_tun_route_attr *attr) +{ + if (attr->n) + neigh_release(attr->n); + if (attr->route_dev) + dev_put(attr->route_dev); +} + struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev) { if (netif_is_vxlan(tunnel_dev)) @@ -79,12 +101,10 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv, struct net_device *mirred_dev, - struct net_device **out_dev, - struct net_device **route_dev, - struct flowi4 *fl4, - struct neighbour **out_n, - u8 *out_ttl) + struct mlx5e_tc_tun_route_attr *attr) { + struct net_device *route_dev; + struct net_device *out_dev; struct neighbour *n; struct rtable *rt; @@ -97,46 +117,50 @@ static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = mdev->priv.eswitch; uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - fl4->flowi4_oif = uplink_dev->ifindex; + attr->fl.fl4.flowi4_oif = uplink_dev->ifindex; } - rt = ip_route_output_key(dev_net(mirred_dev), fl4); + rt = ip_route_output_key(dev_net(mirred_dev), &attr->fl.fl4); if (IS_ERR(rt)) return PTR_ERR(rt); if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) { - ip_rt_put(rt); - return -ENETUNREACH; + ret = -ENETUNREACH; + goto err_rt_release; } #else return -EOPNOTSUPP; #endif - ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev); - if (ret < 0) { - ip_rt_put(rt); - return ret; - } - dev_hold(*route_dev); + ret = get_route_and_out_devs(priv, rt->dst.dev, &route_dev, &out_dev); + if (ret < 0) + goto err_rt_release; + dev_hold(route_dev); - if (!(*out_ttl)) - *out_ttl = ip4_dst_hoplimit(&rt->dst); - n = dst_neigh_lookup(&rt->dst, &fl4->daddr); - ip_rt_put(rt); + if (!attr->ttl) + attr->ttl = ip4_dst_hoplimit(&rt->dst); + n = dst_neigh_lookup(&rt->dst, &attr->fl.fl4.daddr); if (!n) { - dev_put(*route_dev); - return -ENOMEM; + ret = -ENOMEM; + goto err_dev_release; } - *out_n = n; + ip_rt_put(rt); + attr->route_dev = route_dev; + attr->out_dev = out_dev; + attr->n = n; return 0; + +err_dev_release: + dev_put(route_dev); +err_rt_release: + ip_rt_put(rt); + return ret; } -static void mlx5e_route_lookup_ipv4_put(struct net_device *route_dev, - struct neighbour *n) +static void mlx5e_route_lookup_ipv4_put(struct mlx5e_tc_tun_route_attr *attr) { - neigh_release(n); - dev_put(route_dev); + mlx5e_tc_tun_route_attr_cleanup(attr); } static const char *mlx5e_netdev_kind(struct net_device *dev) @@ -188,28 +212,26 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); const struct ip_tunnel_key *tun_key = &e->tun_info->key; - struct net_device *out_dev, *route_dev; - struct flowi4 fl4 = {}; - struct neighbour *n; + struct mlx5e_neigh m_neigh = {}; + TC_TUN_ROUTE_ATTR_INIT(attr); int ipv4_encap_size; char *encap_header; - u8 nud_state, ttl; struct iphdr *ip; + u8 nud_state; int err; /* add the IP fields */ - fl4.flowi4_tos = tun_key->tos; - fl4.daddr = tun_key->u.ipv4.dst; - fl4.saddr = tun_key->u.ipv4.src; - ttl = tun_key->ttl; + attr.fl.fl4.flowi4_tos = tun_key->tos; + attr.fl.fl4.daddr = tun_key->u.ipv4.dst; + attr.fl.fl4.saddr = tun_key->u.ipv4.src; + attr.ttl = tun_key->ttl; - err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &out_dev, &route_dev, - &fl4, &n, &ttl); + err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr); if (err) return err; ipv4_encap_size = - (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + sizeof(struct iphdr) + e->tunnel->calc_hlen(e); @@ -226,40 +248,36 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, goto release_neigh; } - /* used by mlx5e_detach_encap to lookup a neigh hash table - * entry in the neigh hash table when a user deletes a rule - */ - e->m_neigh.dev = n->dev; - e->m_neigh.family = n->ops->family; - memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - e->out_dev = out_dev; - e->route_dev_ifindex = route_dev->ifindex; + m_neigh.family = attr.n->ops->family; + memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len); + e->out_dev = attr.out_dev; + e->route_dev_ifindex = attr.route_dev->ifindex; /* It's important to add the neigh to the hash table before checking * the neigh validity state. So if we'll get a notification, in case the * neigh changes it's validity state, we would find the relevant neigh * in the hash. */ - err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev); if (err) goto free_encap; - read_lock_bh(&n->lock); - nud_state = n->nud_state; - ether_addr_copy(e->h_dest, n->ha); - read_unlock_bh(&n->lock); + read_lock_bh(&attr.n->lock); + nud_state = attr.n->nud_state; + ether_addr_copy(e->h_dest, attr.n->ha); + read_unlock_bh(&attr.n->lock); /* add ethernet header */ - ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, route_dev, e, + ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e, ETH_P_IP); /* add ip header */ ip->tos = tun_key->tos; ip->version = 0x4; ip->ihl = 0x5; - ip->ttl = ttl; - ip->daddr = fl4.daddr; - ip->saddr = fl4.saddr; + ip->ttl = attr.ttl; + ip->daddr = attr.fl.fl4.daddr; + ip->saddr = attr.fl.fl4.saddr; /* add tunneling protocol header */ err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr), @@ -271,7 +289,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, e->encap_header = encap_header; if (!(nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); + neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event * and not used before that. */ @@ -287,8 +305,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, } e->flags |= MLX5_ENCAP_ENTRY_VALID; - mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); - mlx5e_route_lookup_ipv4_put(route_dev, n); + mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); + mlx5e_route_lookup_ipv4_put(&attr); return err; destroy_neigh_entry: @@ -296,55 +314,155 @@ destroy_neigh_entry: free_encap: kfree(encap_header); release_neigh: - mlx5e_route_lookup_ipv4_put(route_dev, n); + mlx5e_route_lookup_ipv4_put(&attr); + return err; +} + +int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + TC_TUN_ROUTE_ATTR_INIT(attr); + int ipv4_encap_size; + char *encap_header; + struct iphdr *ip; + u8 nud_state; + int err; + + /* add the IP fields */ + attr.fl.fl4.flowi4_tos = tun_key->tos; + attr.fl.fl4.daddr = tun_key->u.ipv4.dst; + attr.fl.fl4.saddr = tun_key->u.ipv4.src; + attr.ttl = tun_key->ttl; + + err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr); + if (err) + return err; + + ipv4_encap_size = + (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct iphdr) + + e->tunnel->calc_hlen(e); + + if (max_encap_size < ipv4_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv4_encap_size, max_encap_size); + err = -EOPNOTSUPP; + goto release_neigh; + } + + encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); + if (!encap_header) { + err = -ENOMEM; + goto release_neigh; + } + + e->route_dev_ifindex = attr.route_dev->ifindex; + + read_lock_bh(&attr.n->lock); + nud_state = attr.n->nud_state; + ether_addr_copy(e->h_dest, attr.n->ha); + WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev); + read_unlock_bh(&attr.n->lock); + + /* add ethernet header */ + ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e, + ETH_P_IP); + + /* add ip header */ + ip->tos = tun_key->tos; + ip->version = 0x4; + ip->ihl = 0x5; + ip->ttl = attr.ttl; + ip->daddr = attr.fl.fl4.daddr; + ip->saddr = attr.fl.fl4.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr), + &ip->protocol, e); + if (err) + goto free_encap; + + e->encap_size = ipv4_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(attr.n, NULL); + /* the encap entry will be made valid on neigh update event + * and not used before that. + */ + goto release_neigh; + } + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv4_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); + goto free_encap; + } + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); + mlx5e_route_lookup_ipv4_put(&attr); + return err; + +free_encap: + kfree(encap_header); +release_neigh: + mlx5e_route_lookup_ipv4_put(&attr); return err; } #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv, struct net_device *mirred_dev, - struct net_device **out_dev, - struct net_device **route_dev, - struct flowi6 *fl6, - struct neighbour **out_n, - u8 *out_ttl) + struct mlx5e_tc_tun_route_attr *attr) { + struct net_device *route_dev; + struct net_device *out_dev; struct dst_entry *dst; struct neighbour *n; - int ret; - dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, fl6, + dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, &attr->fl.fl6, NULL); if (IS_ERR(dst)) return PTR_ERR(dst); - if (!(*out_ttl)) - *out_ttl = ip6_dst_hoplimit(dst); + if (!attr->ttl) + attr->ttl = ip6_dst_hoplimit(dst); - ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev); - if (ret < 0) { - dst_release(dst); - return ret; - } + ret = get_route_and_out_devs(priv, dst->dev, &route_dev, &out_dev); + if (ret < 0) + goto err_dst_release; - dev_hold(*route_dev); - n = dst_neigh_lookup(dst, &fl6->daddr); - dst_release(dst); + dev_hold(route_dev); + n = dst_neigh_lookup(dst, &attr->fl.fl6.daddr); if (!n) { - dev_put(*route_dev); - return -ENOMEM; + ret = -ENOMEM; + goto err_dev_release; } - *out_n = n; + dst_release(dst); + attr->out_dev = out_dev; + attr->route_dev = route_dev; + attr->n = n; return 0; + +err_dev_release: + dev_put(route_dev); +err_dst_release: + dst_release(dst); + return ret; } -static void mlx5e_route_lookup_ipv6_put(struct net_device *route_dev, - struct neighbour *n) +static void mlx5e_route_lookup_ipv6_put(struct mlx5e_tc_tun_route_attr *attr) { - neigh_release(n); - dev_put(route_dev); + mlx5e_tc_tun_route_attr_cleanup(attr); } int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, @@ -353,28 +471,25 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); const struct ip_tunnel_key *tun_key = &e->tun_info->key; - struct net_device *out_dev, *route_dev; - struct flowi6 fl6 = {}; + struct mlx5e_neigh m_neigh = {}; + TC_TUN_ROUTE_ATTR_INIT(attr); struct ipv6hdr *ip6h; - struct neighbour *n = NULL; int ipv6_encap_size; char *encap_header; - u8 nud_state, ttl; + u8 nud_state; int err; - ttl = tun_key->ttl; + attr.ttl = tun_key->ttl; + attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); + attr.fl.fl6.daddr = tun_key->u.ipv6.dst; + attr.fl.fl6.saddr = tun_key->u.ipv6.src; - fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); - fl6.daddr = tun_key->u.ipv6.dst; - fl6.saddr = tun_key->u.ipv6.src; - - err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &out_dev, &route_dev, - &fl6, &n, &ttl); + err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr); if (err) return err; ipv6_encap_size = - (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + sizeof(struct ipv6hdr) + e->tunnel->calc_hlen(e); @@ -391,39 +506,35 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, goto release_neigh; } - /* used by mlx5e_detach_encap to lookup a neigh hash table - * entry in the neigh hash table when a user deletes a rule - */ - e->m_neigh.dev = n->dev; - e->m_neigh.family = n->ops->family; - memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - e->out_dev = out_dev; - e->route_dev_ifindex = route_dev->ifindex; + m_neigh.family = attr.n->ops->family; + memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len); + e->out_dev = attr.out_dev; + e->route_dev_ifindex = attr.route_dev->ifindex; /* It's importent to add the neigh to the hash table before checking * the neigh validity state. So if we'll get a notification, in case the * neigh changes it's validity state, we would find the relevant neigh * in the hash. */ - err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev); if (err) goto free_encap; - read_lock_bh(&n->lock); - nud_state = n->nud_state; - ether_addr_copy(e->h_dest, n->ha); - read_unlock_bh(&n->lock); + read_lock_bh(&attr.n->lock); + nud_state = attr.n->nud_state; + ether_addr_copy(e->h_dest, attr.n->ha); + read_unlock_bh(&attr.n->lock); /* add ethernet header */ - ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, route_dev, e, + ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e, ETH_P_IPV6); /* add ip header */ ip6_flow_hdr(ip6h, tun_key->tos, 0); /* the HW fills up ipv6 payload len */ - ip6h->hop_limit = ttl; - ip6h->daddr = fl6.daddr; - ip6h->saddr = fl6.saddr; + ip6h->hop_limit = attr.ttl; + ip6h->daddr = attr.fl.fl6.daddr; + ip6h->saddr = attr.fl.fl6.saddr; /* add tunneling protocol header */ err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr), @@ -435,7 +546,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, e->encap_header = encap_header; if (!(nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); + neigh_event_send(attr.n, NULL); /* the encap entry will be made valid on neigh update event * and not used before that. */ @@ -452,8 +563,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, } e->flags |= MLX5_ENCAP_ENTRY_VALID; - mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); - mlx5e_route_lookup_ipv6_put(route_dev, n); + mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); + mlx5e_route_lookup_ipv6_put(&attr); return err; destroy_neigh_entry: @@ -461,10 +572,160 @@ destroy_neigh_entry: free_encap: kfree(encap_header); release_neigh: - mlx5e_route_lookup_ipv6_put(route_dev, n); + mlx5e_route_lookup_ipv6_put(&attr); return err; } + +int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + TC_TUN_ROUTE_ATTR_INIT(attr); + struct ipv6hdr *ip6h; + int ipv6_encap_size; + char *encap_header; + u8 nud_state; + int err; + + attr.ttl = tun_key->ttl; + + attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); + attr.fl.fl6.daddr = tun_key->u.ipv6.dst; + attr.fl.fl6.saddr = tun_key->u.ipv6.src; + + err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr); + if (err) + return err; + + ipv6_encap_size = + (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct ipv6hdr) + + e->tunnel->calc_hlen(e); + + if (max_encap_size < ipv6_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv6_encap_size, max_encap_size); + err = -EOPNOTSUPP; + goto release_neigh; + } + + encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); + if (!encap_header) { + err = -ENOMEM; + goto release_neigh; + } + + e->route_dev_ifindex = attr.route_dev->ifindex; + + read_lock_bh(&attr.n->lock); + nud_state = attr.n->nud_state; + ether_addr_copy(e->h_dest, attr.n->ha); + WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev); + read_unlock_bh(&attr.n->lock); + + /* add ethernet header */ + ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e, + ETH_P_IPV6); + + /* add ip header */ + ip6_flow_hdr(ip6h, tun_key->tos, 0); + /* the HW fills up ipv6 payload len */ + ip6h->hop_limit = attr.ttl; + ip6h->daddr = attr.fl.fl6.daddr; + ip6h->saddr = attr.fl.fl6.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr), + &ip6h->nexthdr, e); + if (err) + goto free_encap; + + e->encap_size = ipv6_encap_size; + kfree(e->encap_header); + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(attr.n, NULL); + /* the encap entry will be made valid on neigh update event + * and not used before that. + */ + goto release_neigh; + } + + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv6_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + err = PTR_ERR(e->pkt_reformat); + goto free_encap; + } + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev)); + mlx5e_route_lookup_ipv6_put(&attr); + return err; + +free_encap: + kfree(encap_header); +release_neigh: + mlx5e_route_lookup_ipv6_put(&attr); + return err; +} +#endif + +int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *flow_attr) +{ + struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr; + TC_TUN_ROUTE_ATTR_INIT(attr); + u16 vport_num; + int err = 0; + + if (flow_attr->ip_version == 4) { + /* Addresses are swapped for decap */ + attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4; + attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4; + err = mlx5e_route_lookup_ipv4_get(priv, priv->netdev, &attr); + } +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + else if (flow_attr->ip_version == 6) { + /* Addresses are swapped for decap */ + attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6; + attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6; + err = mlx5e_route_lookup_ipv6_get(priv, priv->netdev, &attr); + } #endif + else + return 0; + + if (err) + return err; + + if (attr.route_dev->netdev_ops != &mlx5e_netdev_ops || + !mlx5e_tc_is_vf_tunnel(attr.out_dev, attr.route_dev)) + goto out; + + err = mlx5e_tc_query_route_vport(attr.out_dev, attr.route_dev, &vport_num); + if (err) + goto out; + + esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value, + misc_parameters.vxlan_vni); + esw_attr->rx_tun_attr->decap_vport = vport_num; + +out: + if (flow_attr->ip_version == 4) + mlx5e_route_lookup_ipv4_put(&attr); +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + else if (flow_attr->ip_version == 6) + mlx5e_route_lookup_ipv6_put(&attr); +#endif + return err; +} bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, struct net_device *netdev) @@ -625,14 +886,6 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev, } } - /* Enforce DMAC when offloading incoming tunneled flows. - * Flow counters require a match on the DMAC. - */ - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16); - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dmac_47_16), priv->netdev->dev_addr); - /* let software handle IP fragments */ MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index 704359df6095..67de2bf36861 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -11,6 +11,8 @@ #include "en.h" #include "en_rep.h" +#ifdef CONFIG_MLX5_ESWITCH + enum { MLX5E_TC_TUNNEL_TYPE_UNKNOWN, MLX5E_TC_TUNNEL_TYPE_VXLAN, @@ -59,17 +61,30 @@ int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5e_encap_entry *e); +int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5e_encap_entry *e); +int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); #else static inline int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5e_encap_entry *e) { return -EOPNOTSUPP; } +int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ return -EOPNOTSUPP; } #endif +int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, struct net_device *netdev); @@ -86,4 +101,6 @@ int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, void *headers_c, void *headers_v); +#endif /* CONFIG_MLX5_ESWITCH */ + #endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c new file mode 100644 index 000000000000..6a116335bb21 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -0,0 +1,1653 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies. */ + +#include <net/fib_notifier.h> +#include "tc_tun_encap.h" +#include "en_tc.h" +#include "tc_tun.h" +#include "rep/tc.h" +#include "diag/en_tc_tracepoint.h" + +enum { + MLX5E_ROUTE_ENTRY_VALID = BIT(0), +}; + +struct mlx5e_route_key { + int ip_version; + union { + __be32 v4; + struct in6_addr v6; + } endpoint_ip; +}; + +struct mlx5e_route_entry { + struct mlx5e_route_key key; + struct list_head encap_entries; + struct list_head decap_flows; + u32 flags; + struct hlist_node hlist; + refcount_t refcnt; + int tunnel_dev_index; + struct rcu_head rcu; +}; + +struct mlx5e_tc_tun_encap { + struct mlx5e_priv *priv; + struct notifier_block fib_nb; + spinlock_t route_lock; /* protects route_tbl */ + unsigned long route_tbl_last_update; + DECLARE_HASHTABLE(route_tbl, 8); +}; + +static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r) +{ + return r->flags & MLX5E_ROUTE_ENTRY_VALID; +} + +int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec) +{ + struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; + struct mlx5_rx_tun_attr *tun_attr; + void *daddr, *saddr; + u8 ip_version; + + tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL); + if (!tun_attr) + return -ENOMEM; + + esw_attr->rx_tun_attr = tun_attr; + ip_version = mlx5e_tc_get_ip_version(spec, true); + + if (ip_version == 4) { + daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + tun_attr->dst_ip.v4 = *(__be32 *)daddr; + tun_attr->src_ip.v4 = *(__be32 *)saddr; + if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4) + return 0; + } +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + else if (ip_version == 6) { + int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6); + struct in6_addr zerov6 = {}; + + daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6); + saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6); + memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size); + memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size); + if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) || + !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6))) + return 0; + } +#endif + /* Only set the flag if both src and dst ip addresses exist. They are + * required to establish routing. + */ + flow_flag_set(flow, TUN_RX); + return 0; +} + +static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr) +{ + bool all_flow_encaps_valid = true; + int i; + + /* Flow can be associated with multiple encap entries. + * Before offloading the flow verify that all of them have + * a valid neighbour. + */ + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) + continue; + if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { + all_flow_encaps_valid = false; + break; + } + } + + return all_flow_encaps_valid; +} + +void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct list_head *flow_list) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; + struct mlx5_flow_spec *spec; + struct mlx5e_tc_flow *flow; + int err; + + if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE) + return; + + e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + e->encap_size, e->encap_header, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(e->pkt_reformat)) { + mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", + PTR_ERR(e->pkt_reformat)); + return; + } + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(priv); + + list_for_each_entry(flow, flow_list, tmp_list) { + if (!mlx5e_is_offloaded_flow(flow)) + continue; + attr = flow->attr; + esw_attr = attr->esw_attr; + spec = &attr->parse_attr->spec; + + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; + esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + + /* Do not offload flows with unresolved neighbors */ + if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) + continue; + /* update from slow path rule to encap rule */ + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", + err); + continue; + } + + mlx5e_tc_unoffload_from_slow_path(esw, flow); + flow->rule[0] = rule; + /* was unset when slow path rule removed */ + flow_flag_set(flow, OFFLOADED); + } +} + +void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct list_head *flow_list) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; + struct mlx5_flow_spec *spec; + struct mlx5e_tc_flow *flow; + int err; + + list_for_each_entry(flow, flow_list, tmp_list) { + if (!mlx5e_is_offloaded_flow(flow)) + continue; + attr = flow->attr; + esw_attr = attr->esw_attr; + spec = &attr->parse_attr->spec; + + /* update from encap rule to slow path rule */ + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); + /* mark the flow's encap dest as non-valid */ + esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", + err); + continue; + } + + mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); + flow->rule[0] = rule; + /* was unset when fast path rule removed */ + flow_flag_set(flow, OFFLOADED); + } + + /* we know that the encap is valid */ + e->flags &= ~MLX5_ENCAP_ENTRY_VALID; + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); +} + +static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, + struct list_head *flow_list, + int index) +{ + if (IS_ERR(mlx5e_flow_get(flow))) + return; + wait_for_completion(&flow->init_done); + + flow->tmp_entry_index = index; + list_add(&flow->tmp_list, flow_list); +} + +/* Takes reference to all flows attached to encap and adds the flows to + * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. + */ +void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list) +{ + struct encap_flow_item *efi; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(efi, &e->flows, list) { + flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); + mlx5e_take_tmp_flow(flow, flow_list, efi->index); + } +} + +/* Takes reference to all flows attached to route and adds the flows to + * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. + */ +static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r, + struct list_head *flow_list) +{ + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, &r->decap_flows, decap_routes) + mlx5e_take_tmp_flow(flow, flow_list, 0); +} + +static struct mlx5e_encap_entry * +mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_encap_entry *next = NULL; + +retry: + rcu_read_lock(); + + /* find encap with non-zero reference counter value */ + for (next = e ? + list_next_or_null_rcu(&nhe->encap_list, + &e->encap_list, + struct mlx5e_encap_entry, + encap_list) : + list_first_or_null_rcu(&nhe->encap_list, + struct mlx5e_encap_entry, + encap_list); + next; + next = list_next_or_null_rcu(&nhe->encap_list, + &next->encap_list, + struct mlx5e_encap_entry, + encap_list)) + if (mlx5e_encap_take(next)) + break; + + rcu_read_unlock(); + + /* release starting encap */ + if (e) + mlx5e_encap_put(netdev_priv(e->out_dev), e); + if (!next) + return next; + + /* wait for encap to be fully initialized */ + wait_for_completion(&next->res_ready); + /* continue searching if encap entry is not in valid state after completion */ + if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) { + e = next; + goto retry; + } + + return next; +} + +void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_neigh *m_neigh = &nhe->m_neigh; + struct mlx5e_encap_entry *e = NULL; + struct mlx5e_tc_flow *flow; + struct mlx5_fc *counter; + struct neigh_table *tbl; + bool neigh_used = false; + struct neighbour *n; + u64 lastuse; + + if (m_neigh->family == AF_INET) + tbl = &arp_tbl; +#if IS_ENABLED(CONFIG_IPV6) + else if (m_neigh->family == AF_INET6) + tbl = ipv6_stub->nd_tbl; +#endif + else + return; + + /* mlx5e_get_next_valid_encap() releases previous encap before returning + * next one. + */ + while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) { + struct mlx5e_priv *priv = netdev_priv(e->out_dev); + struct encap_flow_item *efi, *tmp; + struct mlx5_eswitch *esw; + LIST_HEAD(flow_list); + + esw = priv->mdev->priv.eswitch; + mutex_lock(&esw->offloads.encap_tbl_lock); + list_for_each_entry_safe(efi, tmp, &e->flows, list) { + flow = container_of(efi, struct mlx5e_tc_flow, + encaps[efi->index]); + if (IS_ERR(mlx5e_flow_get(flow))) + continue; + list_add(&flow->tmp_list, &flow_list); + + if (mlx5e_is_offloaded_flow(flow)) { + counter = mlx5e_tc_get_counter(flow); + lastuse = mlx5_fc_query_lastuse(counter); + if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { + neigh_used = true; + break; + } + } + } + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_put_flow_list(priv, &flow_list); + if (neigh_used) { + /* release current encap before breaking the loop */ + mlx5e_encap_put(priv, e); + break; + } + } + + trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used); + + if (neigh_used) { + nhe->reported_lastuse = jiffies; + + /* find the relevant neigh according to the cached device and + * dst ip pair + */ + n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev)); + if (!n) + return; + + neigh_event_send(n, NULL); + neigh_release(n); + } +} + +static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) +{ + WARN_ON(!list_empty(&e->flows)); + + if (e->compl_result > 0) { + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); + + if (e->flags & MLX5_ENCAP_ENTRY_VALID) + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); + } + + kfree(e->tun_info); + kfree(e->encap_header); + kfree_rcu(e, rcu); +} + +static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, + struct mlx5e_decap_entry *d) +{ + WARN_ON(!list_empty(&d->flows)); + + if (!d->compl_result) + mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat); + + kfree_rcu(d, rcu); +} + +void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) + return; + list_del(&e->route_list); + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); +} + +static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock)) + return; + hash_del_rcu(&d->hlist); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + mlx5e_decap_dealloc(priv, d); +} + +static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + int out_index); + +void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, int out_index) +{ + struct mlx5e_encap_entry *e = flow->encaps[out_index].e; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (flow->attr->esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + mlx5e_detach_encap_route(priv, flow, out_index); + + /* flow wasn't fully initialized */ + if (!e) + return; + + mutex_lock(&esw->offloads.encap_tbl_lock); + list_del(&flow->encaps[out_index].list); + flow->encaps[out_index].e = NULL; + if (!refcount_dec_and_test(&e->refcnt)) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + return; + } + list_del(&e->route_list); + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); +} + +void mlx5e_detach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_decap_entry *d = flow->decap_reformat; + + if (!d) + return; + + mutex_lock(&esw->offloads.decap_tbl_lock); + list_del(&flow->l3_to_l2_reformat); + flow->decap_reformat = NULL; + + if (!refcount_dec_and_test(&d->refcnt)) { + mutex_unlock(&esw->offloads.decap_tbl_lock); + return; + } + hash_del_rcu(&d->hlist); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + mlx5e_decap_dealloc(priv, d); +} + +struct encap_key { + const struct ip_tunnel_key *ip_tun_key; + struct mlx5e_tc_tunnel *tc_tunnel; +}; + +static int cmp_encap_info(struct encap_key *a, + struct encap_key *b) +{ + return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || + a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; +} + +static int cmp_decap_info(struct mlx5e_decap_key *a, + struct mlx5e_decap_key *b) +{ + return memcmp(&a->key, &b->key, sizeof(b->key)); +} + +static int hash_encap_info(struct encap_key *key) +{ + return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), + key->tc_tunnel->tunnel_type); +} + +static int hash_decap_info(struct mlx5e_decap_key *key) +{ + return jhash(&key->key, sizeof(key->key), 0); +} + +bool mlx5e_encap_take(struct mlx5e_encap_entry *e) +{ + return refcount_inc_not_zero(&e->refcnt); +} + +static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) +{ + return refcount_inc_not_zero(&e->refcnt); +} + +static struct mlx5e_encap_entry * +mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, + uintptr_t hash_key) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_encap_entry *e; + struct encap_key e_key; + + hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, + encap_hlist, hash_key) { + e_key.ip_tun_key = &e->tun_info->key; + e_key.tc_tunnel = e->tunnel; + if (!cmp_encap_info(&e_key, key) && + mlx5e_encap_take(e)) + return e; + } + + return NULL; +} + +static struct mlx5e_decap_entry * +mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, + uintptr_t hash_key) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_decap_key r_key; + struct mlx5e_decap_entry *e; + + hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, + hlist, hash_key) { + r_key = e->key; + if (!cmp_decap_info(&r_key, key) && + mlx5e_decap_take(e)) + return e; + } + return NULL; +} + +struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info) +{ + size_t tun_size = sizeof(*tun_info) + tun_info->options_len; + + return kmemdup(tun_info, tun_size, GFP_KERNEL); +} + +static bool is_duplicated_encap_entry(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + int out_index, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + int i; + + for (i = 0; i < out_index; i++) { + if (flow->encaps[i].e != e) + continue; + NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action"); + netdev_err(priv->netdev, "can't duplicate encap action\n"); + return true; + } + + return false; +} + +static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + struct net_device *out_dev, + int route_dev_ifindex, + int out_index) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct net_device *route_dev; + u16 vport_num; + int err = 0; + u32 data; + + route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); + + if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || + !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) + goto out; + + err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); + if (err) + goto out; + + attr->dest_chain = 0; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE; + data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch, + vport_num); + err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts, + MLX5_FLOW_NAMESPACE_FDB, + VPORT_TO_REG, data); + if (err >= 0) { + esw_attr->dests[out_index].src_port_rewrite_act_id = err; + err = 0; + } + +out: + if (route_dev) + dev_put(route_dev); + return err; +} + +static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + struct net_device *out_dev, + int route_dev_ifindex, + int out_index) +{ + int act_id = attr->dests[out_index].src_port_rewrite_act_id; + struct net_device *route_dev; + u16 vport_num; + int err = 0; + u32 data; + + route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex); + + if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops || + !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) { + err = -ENODEV; + goto out; + } + + err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num); + if (err) + goto out; + + data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch, + vport_num); + mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data); + +out: + if (route_dev) + dev_put(route_dev); + return err; +} + +static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_tc_tun_encap *encap; + unsigned int ret; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + encap = uplink_priv->encap; + + spin_lock_bh(&encap->route_lock); + ret = encap->route_tbl_last_update; + spin_unlock_bh(&encap->route_lock); + return ret; +} + +static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5e_encap_entry *e, + bool new_encap_entry, + unsigned long tbl_time_before, + int out_index); + +int mlx5e_attach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct net_device *mirred_dev, + int out_index, + struct netlink_ext_ack *extack, + struct net_device **encap_dev, + bool *encap_valid) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + const struct ip_tunnel_info *tun_info; + unsigned long tbl_time_before = 0; + struct encap_key key; + struct mlx5e_encap_entry *e; + bool entry_created = false; + unsigned short family; + uintptr_t hash_key; + int err = 0; + + parse_attr = attr->parse_attr; + tun_info = parse_attr->tun_info[out_index]; + family = ip_tunnel_info_af(tun_info); + key.ip_tun_key = &tun_info->key; + key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); + if (!key.tc_tunnel) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel"); + return -EOPNOTSUPP; + } + + hash_key = hash_encap_info(&key); + + mutex_lock(&esw->offloads.encap_tbl_lock); + e = mlx5e_encap_get(priv, &key, hash_key); + + /* must verify if encap is valid or not */ + if (e) { + /* Check that entry was not already attached to this flow */ + if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) { + err = -EOPNOTSUPP; + goto out_err; + } + + mutex_unlock(&esw->offloads.encap_tbl_lock); + wait_for_completion(&e->res_ready); + + /* Protect against concurrent neigh update. */ + mutex_lock(&esw->offloads.encap_tbl_lock); + if (e->compl_result < 0) { + err = -EREMOTEIO; + goto out_err; + } + goto attach_flow; + } + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) { + err = -ENOMEM; + goto out_err; + } + + refcount_set(&e->refcnt, 1); + init_completion(&e->res_ready); + entry_created = true; + INIT_LIST_HEAD(&e->route_list); + + tun_info = mlx5e_dup_tun_info(tun_info); + if (!tun_info) { + err = -ENOMEM; + goto out_err_init; + } + e->tun_info = tun_info; + err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); + if (err) + goto out_err_init; + + INIT_LIST_HEAD(&e->flows); + hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); + tbl_time_before = mlx5e_route_tbl_get_last_update(priv); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + if (family == AF_INET) + err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); + else if (family == AF_INET6) + err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); + + /* Protect against concurrent neigh update. */ + mutex_lock(&esw->offloads.encap_tbl_lock); + complete_all(&e->res_ready); + if (err) { + e->compl_result = err; + goto out_err; + } + e->compl_result = 1; + +attach_flow: + err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before, + out_index); + if (err) + goto out_err; + + flow->encaps[out_index].e = e; + list_add(&flow->encaps[out_index].list, &e->flows); + flow->encaps[out_index].index = out_index; + *encap_dev = e->out_dev; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; + attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + *encap_valid = true; + } else { + *encap_valid = false; + } + mutex_unlock(&esw->offloads.encap_tbl_lock); + + return err; + +out_err: + mutex_unlock(&esw->offloads.encap_tbl_lock); + if (e) + mlx5e_encap_put(priv, e); + return err; + +out_err_init: + mutex_unlock(&esw->offloads.encap_tbl_lock); + kfree(tun_info); + kfree(e); + return err; +} + +int mlx5e_attach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_decap_entry *d; + struct mlx5e_decap_key key; + uintptr_t hash_key; + int err = 0; + + parse_attr = flow->attr->parse_attr; + if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { + NL_SET_ERR_MSG_MOD(extack, + "encap header larger than max supported"); + return -EOPNOTSUPP; + } + + key.key = parse_attr->eth; + hash_key = hash_decap_info(&key); + mutex_lock(&esw->offloads.decap_tbl_lock); + d = mlx5e_decap_get(priv, &key, hash_key); + if (d) { + mutex_unlock(&esw->offloads.decap_tbl_lock); + wait_for_completion(&d->res_ready); + mutex_lock(&esw->offloads.decap_tbl_lock); + if (d->compl_result) { + err = -EREMOTEIO; + goto out_free; + } + goto found; + } + + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + err = -ENOMEM; + goto out_err; + } + + d->key = key; + refcount_set(&d->refcnt, 1); + init_completion(&d->res_ready); + INIT_LIST_HEAD(&d->flows); + hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2, + sizeof(parse_attr->eth), + &parse_attr->eth, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(d->pkt_reformat)) { + err = PTR_ERR(d->pkt_reformat); + d->compl_result = err; + } + mutex_lock(&esw->offloads.decap_tbl_lock); + complete_all(&d->res_ready); + if (err) + goto out_free; + +found: + flow->decap_reformat = d; + attr->decap_pkt_reformat = d->pkt_reformat; + list_add(&flow->l3_to_l2_reformat, &d->flows); + mutex_unlock(&esw->offloads.decap_tbl_lock); + return 0; + +out_free: + mutex_unlock(&esw->offloads.decap_tbl_lock); + mlx5e_decap_put(priv, d); + return err; + +out_err: + mutex_unlock(&esw->offloads.decap_tbl_lock); + return err; +} + +static int cmp_route_info(struct mlx5e_route_key *a, + struct mlx5e_route_key *b) +{ + if (a->ip_version == 4 && b->ip_version == 4) + return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4, + sizeof(a->endpoint_ip.v4)); + else if (a->ip_version == 6 && b->ip_version == 6) + return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6, + sizeof(a->endpoint_ip.v6)); + return 1; +} + +static u32 hash_route_info(struct mlx5e_route_key *key) +{ + if (key->ip_version == 4) + return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0); + return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0); +} + +static void mlx5e_route_dealloc(struct mlx5e_priv *priv, + struct mlx5e_route_entry *r) +{ + WARN_ON(!list_empty(&r->decap_flows)); + WARN_ON(!list_empty(&r->encap_entries)); + + kfree_rcu(r, rcu); +} + +static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock)) + return; + + hash_del_rcu(&r->hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_route_dealloc(priv, r); +} + +static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + lockdep_assert_held(&esw->offloads.encap_tbl_lock); + + if (!refcount_dec_and_test(&r->refcnt)) + return; + hash_del_rcu(&r->hlist); + mlx5e_route_dealloc(priv, r); +} + +static struct mlx5e_route_entry * +mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key, + u32 hash_key) +{ + struct mlx5e_route_key r_key; + struct mlx5e_route_entry *r; + + hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) { + r_key = r->key; + if (!cmp_route_info(&r_key, key) && + refcount_inc_not_zero(&r->refcnt)) + return r; + } + return NULL; +} + +static struct mlx5e_route_entry * +mlx5e_route_get_create(struct mlx5e_priv *priv, + struct mlx5e_route_key *key, + int tunnel_dev_index, + unsigned long *route_tbl_change_time) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5e_tc_tun_encap *encap; + struct mlx5e_route_entry *r; + u32 hash_key; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + encap = uplink_priv->encap; + + hash_key = hash_route_info(key); + spin_lock_bh(&encap->route_lock); + r = mlx5e_route_get(encap, key, hash_key); + spin_unlock_bh(&encap->route_lock); + if (r) { + if (!mlx5e_route_entry_valid(r)) { + mlx5e_route_put_locked(priv, r); + return ERR_PTR(-EINVAL); + } + return r; + } + + r = kzalloc(sizeof(*r), GFP_KERNEL); + if (!r) + return ERR_PTR(-ENOMEM); + + r->key = *key; + r->flags |= MLX5E_ROUTE_ENTRY_VALID; + r->tunnel_dev_index = tunnel_dev_index; + refcount_set(&r->refcnt, 1); + INIT_LIST_HEAD(&r->decap_flows); + INIT_LIST_HEAD(&r->encap_entries); + + spin_lock_bh(&encap->route_lock); + *route_tbl_change_time = encap->route_tbl_last_update; + hash_add(encap->route_tbl, &r->hlist, hash_key); + spin_unlock_bh(&encap->route_lock); + + return r; +} + +static struct mlx5e_route_entry * +mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key) +{ + u32 hash_key = hash_route_info(key); + struct mlx5e_route_entry *r; + + spin_lock_bh(&encap->route_lock); + encap->route_tbl_last_update = jiffies; + r = mlx5e_route_get(encap, key, hash_key); + spin_unlock_bh(&encap->route_lock); + + return r; +} + +struct mlx5e_tc_fib_event_data { + struct work_struct work; + unsigned long event; + struct mlx5e_route_entry *r; + struct net_device *ul_dev; +}; + +static void mlx5e_tc_fib_event_work(struct work_struct *work); +static struct mlx5e_tc_fib_event_data * +mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags) +{ + struct mlx5e_tc_fib_event_data *fib_work; + + fib_work = kzalloc(sizeof(*fib_work), flags); + if (WARN_ON(!fib_work)) + return NULL; + + INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work); + fib_work->event = event; + fib_work->ul_dev = ul_dev; + + return fib_work; +} + +static int +mlx5e_route_enqueue_update(struct mlx5e_priv *priv, + struct mlx5e_route_entry *r, + unsigned long event) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_fib_event_data *fib_work; + struct mlx5e_rep_priv *uplink_rpriv; + struct net_device *ul_dev; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + ul_dev = uplink_rpriv->netdev; + + fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL); + if (!fib_work) + return -ENOMEM; + + dev_hold(ul_dev); + refcount_inc(&r->refcnt); + fib_work->r = r; + queue_work(priv->wq, &fib_work->work); + + return 0; +} + +int mlx5e_attach_decap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + unsigned long tbl_time_before, tbl_time_after; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5e_route_entry *r; + struct mlx5e_route_key key; + int err = 0; + + esw_attr = attr->esw_attr; + parse_attr = attr->parse_attr; + mutex_lock(&esw->offloads.encap_tbl_lock); + if (!esw_attr->rx_tun_attr) + goto out; + + tbl_time_before = mlx5e_route_tbl_get_last_update(priv); + tbl_time_after = tbl_time_before; + err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr); + if (err || !esw_attr->rx_tun_attr->decap_vport) + goto out; + + key.ip_version = attr->ip_version; + if (key.ip_version == 4) + key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4; + else + key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6; + + r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex, + &tbl_time_after); + if (IS_ERR(r)) { + err = PTR_ERR(r); + goto out; + } + /* Routing changed concurrently. FIB event handler might have missed new + * entry, schedule update. + */ + if (tbl_time_before != tbl_time_after) { + err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); + if (err) { + mlx5e_route_put_locked(priv, r); + goto out; + } + } + + flow->decap_route = r; + list_add(&flow->decap_routes, &r->decap_flows); + mutex_unlock(&esw->offloads.encap_tbl_lock); + return 0; + +out: + mutex_unlock(&esw->offloads.encap_tbl_lock); + return err; +} + +static int mlx5e_attach_encap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5e_encap_entry *e, + bool new_encap_entry, + unsigned long tbl_time_before, + int out_index) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + unsigned long tbl_time_after = tbl_time_before; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + const struct ip_tunnel_info *tun_info; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5e_route_entry *r; + struct mlx5e_route_key key; + unsigned short family; + int err = 0; + + esw_attr = attr->esw_attr; + parse_attr = attr->parse_attr; + tun_info = parse_attr->tun_info[out_index]; + family = ip_tunnel_info_af(tun_info); + + if (family == AF_INET) { + key.endpoint_ip.v4 = tun_info->key.u.ipv4.src; + key.ip_version = 4; + } else if (family == AF_INET6) { + key.endpoint_ip.v6 = tun_info->key.u.ipv6.src; + key.ip_version = 6; + } + + err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev, + e->route_dev_ifindex, out_index); + if (err || !(esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)) + return err; + + r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index], + &tbl_time_after); + if (IS_ERR(r)) + return PTR_ERR(r); + /* Routing changed concurrently. FIB event handler might have missed new + * entry, schedule update. + */ + if (tbl_time_before != tbl_time_after) { + err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE); + if (err) { + mlx5e_route_put_locked(priv, r); + return err; + } + } + + flow->encap_routes[out_index].r = r; + if (new_encap_entry) + list_add(&e->route_list, &r->encap_entries); + flow->encap_routes[out_index].index = out_index; + return 0; +} + +void mlx5e_detach_decap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_route_entry *r = flow->decap_route; + + if (!r) + return; + + mutex_lock(&esw->offloads.encap_tbl_lock); + list_del(&flow->decap_routes); + flow->decap_route = NULL; + + if (!refcount_dec_and_test(&r->refcnt)) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + return; + } + hash_del_rcu(&r->hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_route_dealloc(priv, r); +} + +static void mlx5e_detach_encap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + int out_index) +{ + struct mlx5e_route_entry *r = flow->encap_routes[out_index].r; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_encap_entry *e, *tmp; + + if (!r) + return; + + mutex_lock(&esw->offloads.encap_tbl_lock); + flow->encap_routes[out_index].r = NULL; + + if (!refcount_dec_and_test(&r->refcnt)) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + return; + } + list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list) + list_del_init(&e->route_list); + hash_del_rcu(&r->hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_route_dealloc(priv, r); +} + +static void mlx5e_invalidate_encap(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct list_head *encap_flows) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, encap_flows, tmp_list) { + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *esw_attr; + + if (!mlx5e_is_offloaded_flow(flow)) + continue; + esw_attr = attr->esw_attr; + + if (flow_flag_test(flow, SLOW)) + mlx5e_tc_unoffload_from_slow_path(esw, flow); + else + mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); + mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); + attr->modify_hdr = NULL; + + esw_attr->dests[flow->tmp_entry_index].flags &= + ~MLX5_ESW_DEST_ENCAP_VALID; + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; + } + + e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + e->flags &= ~MLX5_ENCAP_ENTRY_VALID; + mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); + e->pkt_reformat = NULL; + } +} + +static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, + struct net_device *tunnel_dev, + struct mlx5e_encap_entry *e, + struct list_head *encap_flows) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; + int err; + + err = ip_tunnel_info_af(e->tun_info) == AF_INET ? + mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) : + mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e); + if (err) + mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err); + e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE; + + list_for_each_entry(flow, encap_flows, tmp_list) { + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + + if (flow_flag_test(flow, FAILED)) + continue; + + esw_attr = attr->esw_attr; + parse_attr = attr->parse_attr; + spec = &parse_attr->spec; + + err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts, + e->out_dev, e->route_dev_ifindex, + flow->tmp_entry_index); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err); + continue; + } + + err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d", + err); + continue; + } + + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat; + esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + if (!mlx5e_tc_flow_all_encaps_valid(esw_attr)) + goto offload_to_slow_path; + /* update from slow path rule to encap rule */ + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", + err); + } else { + flow->rule[0] = rule; + } + } else { +offload_to_slow_path: + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); + /* mark the flow's encap dest as non-valid */ + esw_attr->dests[flow->tmp_entry_index].flags &= + ~MLX5_ESW_DEST_ENCAP_VALID; + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", + err); + } else { + flow->rule[0] = rule; + } + } + flow_flag_set(flow, OFFLOADED); + } +} + +static int mlx5e_update_route_encaps(struct mlx5e_priv *priv, + struct mlx5e_route_entry *r, + struct list_head *flow_list, + bool replace) +{ + struct net_device *tunnel_dev; + struct mlx5e_encap_entry *e; + + tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); + if (!tunnel_dev) + return -ENODEV; + + list_for_each_entry(e, &r->encap_entries, route_list) { + LIST_HEAD(encap_flows); + + mlx5e_take_all_encap_flows(e, &encap_flows); + if (list_empty(&encap_flows)) + continue; + + if (mlx5e_route_entry_valid(r)) + mlx5e_invalidate_encap(priv, e, &encap_flows); + + if (!replace) { + list_splice(&encap_flows, flow_list); + continue; + } + + mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows); + list_splice(&encap_flows, flow_list); + } + + return 0; +} + +static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv, + struct list_head *flow_list) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, flow_list, tmp_list) + if (mlx5e_is_offloaded_flow(flow)) + mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); +} + +static void mlx5e_reoffload_decap(struct mlx5e_priv *priv, + struct list_head *decap_flows) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, decap_flows, tmp_list) { + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err; + + if (flow_flag_test(flow, FAILED)) + continue; + + parse_attr = attr->parse_attr; + spec = &parse_attr->spec; + err = mlx5e_tc_tun_route_lookup(priv, spec, attr); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n", + err); + continue; + } + + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n", + err); + } else { + flow->rule[0] = rule; + flow_flag_set(flow, OFFLOADED); + } + } +} + +static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv, + struct mlx5e_route_entry *r, + struct list_head *flow_list, + bool replace) +{ + struct net_device *tunnel_dev; + LIST_HEAD(decap_flows); + + tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index); + if (!tunnel_dev) + return -ENODEV; + + mlx5e_take_all_route_decap_flows(r, &decap_flows); + if (mlx5e_route_entry_valid(r)) + mlx5e_unoffload_flow_list(priv, &decap_flows); + if (replace) + mlx5e_reoffload_decap(priv, &decap_flows); + + list_splice(&decap_flows, flow_list); + + return 0; +} + +static void mlx5e_tc_fib_event_work(struct work_struct *work) +{ + struct mlx5e_tc_fib_event_data *event_data = + container_of(work, struct mlx5e_tc_fib_event_data, work); + struct net_device *ul_dev = event_data->ul_dev; + struct mlx5e_priv *priv = netdev_priv(ul_dev); + struct mlx5e_route_entry *r = event_data->r; + struct mlx5_eswitch *esw; + LIST_HEAD(flow_list); + bool replace; + int err; + + /* sync with concurrent neigh updates */ + rtnl_lock(); + esw = priv->mdev->priv.eswitch; + mutex_lock(&esw->offloads.encap_tbl_lock); + replace = event_data->event == FIB_EVENT_ENTRY_REPLACE; + + if (!mlx5e_route_entry_valid(r) && !replace) + goto out; + + err = mlx5e_update_route_encaps(priv, r, &flow_list, replace); + if (err) + mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n", + err); + + err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace); + if (err) + mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n", + err); + + if (replace) + r->flags |= MLX5E_ROUTE_ENTRY_VALID; +out: + mutex_unlock(&esw->offloads.encap_tbl_lock); + rtnl_unlock(); + + mlx5e_put_flow_list(priv, &flow_list); + mlx5e_route_put(priv, event_data->r); + dev_put(event_data->ul_dev); + kfree(event_data); +} + +static struct mlx5e_tc_fib_event_data * +mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, + struct net_device *ul_dev, + struct mlx5e_tc_tun_encap *encap, + unsigned long event, + struct fib_notifier_info *info) +{ + struct fib_entry_notifier_info *fen_info; + struct mlx5e_tc_fib_event_data *fib_work; + struct mlx5e_route_entry *r; + struct mlx5e_route_key key; + struct net_device *fib_dev; + + fen_info = container_of(info, struct fib_entry_notifier_info, info); + fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; + if (fib_dev->netdev_ops != &mlx5e_netdev_ops || + fen_info->dst_len != 32) + return NULL; + + fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); + if (!fib_work) + return ERR_PTR(-ENOMEM); + + key.endpoint_ip.v4 = htonl(fen_info->dst); + key.ip_version = 4; + + /* Can't fail after this point because releasing reference to r + * requires obtaining sleeping mutex which we can't do in atomic + * context. + */ + r = mlx5e_route_lookup_for_update(encap, &key); + if (!r) + goto out; + fib_work->r = r; + dev_hold(ul_dev); + + return fib_work; + +out: + kfree(fib_work); + return NULL; +} + +static struct mlx5e_tc_fib_event_data * +mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv, + struct net_device *ul_dev, + struct mlx5e_tc_tun_encap *encap, + unsigned long event, + struct fib_notifier_info *info) +{ + struct fib6_entry_notifier_info *fen_info; + struct mlx5e_tc_fib_event_data *fib_work; + struct mlx5e_route_entry *r; + struct mlx5e_route_key key; + struct net_device *fib_dev; + + fen_info = container_of(info, struct fib6_entry_notifier_info, info); + fib_dev = fib6_info_nh_dev(fen_info->rt); + if (fib_dev->netdev_ops != &mlx5e_netdev_ops || + fen_info->rt->fib6_dst.plen != 128) + return NULL; + + fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC); + if (!fib_work) + return ERR_PTR(-ENOMEM); + + memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr, + sizeof(fen_info->rt->fib6_dst.addr)); + key.ip_version = 6; + + /* Can't fail after this point because releasing reference to r + * requires obtaining sleeping mutex which we can't do in atomic + * context. + */ + r = mlx5e_route_lookup_for_update(encap, &key); + if (!r) + goto out; + fib_work->r = r; + dev_hold(ul_dev); + + return fib_work; + +out: + kfree(fib_work); + return NULL; +} + +static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) +{ + struct mlx5e_tc_fib_event_data *fib_work; + struct fib_notifier_info *info = ptr; + struct mlx5e_tc_tun_encap *encap; + struct net_device *ul_dev; + struct mlx5e_priv *priv; + + encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb); + priv = encap->priv; + ul_dev = priv->netdev; + priv = netdev_priv(ul_dev); + + switch (event) { + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_DEL: + if (info->family == AF_INET) + fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info); + else if (info->family == AF_INET6) + fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info); + else + return NOTIFY_DONE; + + if (!IS_ERR_OR_NULL(fib_work)) { + queue_work(priv->wq, &fib_work->work); + } else if (IS_ERR(fib_work)) { + NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work"); + mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n", + PTR_ERR(fib_work)); + } + + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_DONE; +} + +struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_tun_encap *encap; + int err; + + encap = kvzalloc(sizeof(*encap), GFP_KERNEL); + if (!encap) + return ERR_PTR(-ENOMEM); + + encap->priv = priv; + encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event; + spin_lock_init(&encap->route_lock); + hash_init(encap->route_tbl); + err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb, + NULL, NULL); + if (err) { + kvfree(encap); + return ERR_PTR(err); + } + + return encap; +} + +void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap) +{ + if (!encap) + return; + + unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb); + flush_workqueue(encap->priv->wq); /* flush fib event works */ + kvfree(encap); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h new file mode 100644 index 000000000000..3391504d9a08 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_TUN_ENCAP_H__ +#define __MLX5_EN_TC_TUN_ENCAP_H__ + +#include "tc_priv.h" + +void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, int out_index); + +int mlx5e_attach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct net_device *mirred_dev, + int out_index, + struct netlink_ext_ack *extack, + struct net_device **encap_dev, + bool *encap_valid); +int mlx5e_attach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack); +void mlx5e_detach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + +int mlx5e_attach_decap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); +void mlx5e_detach_decap_route(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + +struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info); + +int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec); + +struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv); +void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap); + +#endif /* __MLX5_EN_TC_TUN_ENCAP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4cfdba997f24..d3534b657b98 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -223,7 +223,7 @@ static int blocking_event(struct notifier_block *nb, unsigned long event, void * err = mlx5e_handle_trap_event(priv, data); break; default: - netdev_warn(priv->netdev, "Sync event: Unknouwn event %ld\n", event); + netdev_warn(priv->netdev, "Sync event: Unknown event %ld\n", event); err = -EINVAL; } return err; @@ -5647,7 +5647,7 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, /* sanity */ if (new_max_nch != priv->max_nch) { netdev_warn(priv->netdev, - "%s: Replacing profile with different max channles\n", + "%s: Replacing profile with different max channels\n", __func__); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 84eeaa33033f..a132fff7a980 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include <generated/utsrelease.h> #include <linux/mlx5/fs.h> #include <net/switchdev.h> #include <net/pkt_cls.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 988195ab1c54..d1696404cca9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -59,6 +59,8 @@ struct mlx5e_neigh_update_table { struct mlx5_tc_ct_priv; struct mlx5e_rep_bond; +struct mlx5e_tc_tun_encap; + struct mlx5_rep_uplink_priv { /* Filters DB - instantiated by the uplink representor and shared by * the uplink's VFs @@ -90,6 +92,9 @@ struct mlx5_rep_uplink_priv { /* support eswitch vports bonding */ struct mlx5e_rep_bond *bond; + + /* tc tunneling encapsulation private data */ + struct mlx5e_tc_tun_encap *encap; }; struct mlx5e_rep_priv { @@ -110,7 +115,6 @@ struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep) } struct mlx5e_neigh { - struct net_device *dev; union { __be32 v4; struct in6_addr v6; @@ -122,6 +126,7 @@ struct mlx5e_neigh_hash_entry { struct rhash_head rhash_node; struct mlx5e_neigh m_neigh; struct mlx5e_priv *priv; + struct net_device *neigh_dev; /* Save the neigh hash entry in a list on the representor in * addition to the hash table. In order to iterate easily over the @@ -153,6 +158,7 @@ enum { /* set when the encap entry is successfully offloaded into HW */ MLX5_ENCAP_ENTRY_VALID = BIT(0), MLX5_REFORMAT_DECAP = BIT(1), + MLX5_ENCAP_ENTRY_NO_ROUTE = BIT(2), }; struct mlx5e_decap_key { @@ -175,12 +181,12 @@ struct mlx5e_encap_entry { struct mlx5e_neigh_hash_entry *nhe; /* neigh hash entry list of encaps sharing the same neigh */ struct list_head encap_list; - struct mlx5e_neigh m_neigh; /* a node of the eswitch encap hash table which keeping all the encap * entries */ struct hlist_node encap_hlist; struct list_head flows; + struct list_head route_list; struct mlx5_pkt_reformat *pkt_reformat; const struct ip_tunnel_info *tun_info; unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 280ea1e1e039..9f126054d371 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -63,6 +63,8 @@ #include "en/mapping.h" #include "en/tc_ct.h" #include "en/mod_hdr.h" +#include "en/tc_priv.h" +#include "en/tc_tun_encap.h" #include "lib/devcom.h" #include "lib/geneve.h" #include "lib/fs_chains.h" @@ -71,90 +73,6 @@ #define nic_chains(priv) ((priv)->fs.tc.chains) #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) -#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1) - -enum { - MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT, - MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT, - MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, - MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT, - MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, - MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE, - MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1, - MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2, - MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3, - MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4, - MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, - MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, - MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7, - MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8, -}; - -#define MLX5E_TC_MAX_SPLITS 1 - -/* Helper struct for accessing a struct containing list_head array. - * Containing struct - * |- Helper array - * [0] Helper item 0 - * |- list_head item 0 - * |- index (0) - * [1] Helper item 1 - * |- list_head item 1 - * |- index (1) - * To access the containing struct from one of the list_head items: - * 1. Get the helper item from the list_head item using - * helper item = - * container_of(list_head item, helper struct type, list_head field) - * 2. Get the contining struct from the helper item and its index in the array: - * containing struct = - * container_of(helper item, containing struct type, helper field[index]) - */ -struct encap_flow_item { - struct mlx5e_encap_entry *e; /* attached encap instance */ - struct list_head list; - int index; -}; - -struct mlx5e_tc_flow { - struct rhash_head node; - struct mlx5e_priv *priv; - u64 cookie; - unsigned long flags; - struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; - - /* flows sharing the same reformat object - currently mpls decap */ - struct list_head l3_to_l2_reformat; - struct mlx5e_decap_entry *decap_reformat; - - /* Flow can be associated with multiple encap IDs. - * The number of encaps is bounded by the number of supported - * destinations. - */ - struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; - struct mlx5e_tc_flow *peer_flow; - struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */ - struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */ - struct list_head hairpin; /* flows sharing the same hairpin */ - struct list_head peer; /* flows with peer flow */ - struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */ - struct net_device *orig_dev; /* netdev adding flow first */ - int tmp_efi_index; - struct list_head tmp_list; /* temporary flow list used by neigh update */ - refcount_t refcnt; - struct rcu_head rcu_head; - struct completion init_done; - int tunnel_id; /* the mapped tunnel id of this flow */ - struct mlx5_flow_attr *attr; -}; - -struct mlx5e_tc_flow_parse_attr { - const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; - struct net_device *filter_dev; - struct mlx5_flow_spec spec; - struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; - int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; - struct ethhdr eth; -}; #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) @@ -165,10 +83,15 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { .moffset = 0, .mlen = 2, }, + [VPORT_TO_REG] = { + .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, + .moffset = 2, + .mlen = 2, + }, [TUNNEL_TO_REG] = { .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1, .moffset = 1, - .mlen = 3, + .mlen = ((ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS) / 8), .soffset = MLX5_BYTE_OFF(fte_match_param, misc_parameters_2.metadata_reg_c_1), }, @@ -247,11 +170,11 @@ mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec, } int -mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, - struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, - enum mlx5_flow_namespace_type ns, - enum mlx5e_tc_attr_to_reg type, - u32 data) +mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5_flow_namespace_type ns, + enum mlx5e_tc_attr_to_reg type, + u32 data) { int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; @@ -275,9 +198,10 @@ mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, MLX5_SET(set_action_in, modact, offset, moffset * 8); MLX5_SET(set_action_in, modact, length, mlen * 8); MLX5_SET(set_action_in, modact, data, data); + err = mod_hdr_acts->num_actions; mod_hdr_acts->num_actions++; - return 0; + return err; } static struct mlx5_tc_ct_priv * @@ -326,6 +250,41 @@ mlx5_tc_rule_delete(struct mlx5e_priv *priv, mlx5e_del_offloaded_nic_rule(priv, rule, attr); } +int +mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5_flow_namespace_type ns, + enum mlx5e_tc_attr_to_reg type, + u32 data) +{ + int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data); + + return ret < 0 ? ret : 0; +} + +void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5e_tc_attr_to_reg type, + int act_id, u32 data) +{ + int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; + int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield; + int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; + char *modact; + + modact = mod_hdr_acts->actions + (act_id * MLX5_MH_ACT_SZ); + + /* Firmware has 5bit length field and 0 means 32bits */ + if (mlen == 4) + mlen = 0; + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, mfield); + MLX5_SET(set_action_in, modact, offset, moffset * 8); + MLX5_SET(set_action_in, modact, length, mlen * 8); + MLX5_SET(set_action_in, modact, data, data); +} + struct mlx5e_hairpin { struct mlx5_hairpin *pair; @@ -363,15 +322,14 @@ struct mlx5e_hairpin_entry { static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); -static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow) +struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow) { if (!flow || !refcount_inc_not_zero(&flow->refcnt)) return ERR_PTR(-EINVAL); return flow; } -static void mlx5e_flow_put(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow) +void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { if (refcount_dec_and_test(&flow->refcnt)) { mlx5e_tc_del_flow(priv, flow); @@ -379,48 +337,6 @@ static void mlx5e_flow_put(struct mlx5e_priv *priv, } } -static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag) -{ - /* Complete all memory stores before setting bit. */ - smp_mb__before_atomic(); - set_bit(flag, &flow->flags); -} - -#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag) - -static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow, - unsigned long flag) -{ - /* test_and_set_bit() provides all necessary barriers */ - return test_and_set_bit(flag, &flow->flags); -} - -#define flow_flag_test_and_set(flow, flag) \ - __flow_flag_test_and_set(flow, \ - MLX5E_TC_FLOW_FLAG_##flag) - -static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag) -{ - /* Complete all memory stores before clearing bit. */ - smp_mb__before_atomic(); - clear_bit(flag, &flow->flags); -} - -#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \ - MLX5E_TC_FLOW_FLAG_##flag) - -static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag) -{ - bool ret = test_bit(flag, &flow->flags); - - /* Read fields of flow structure only after checking flags. */ - smp_mb__after_atomic(); - return ret; -} - -#define flow_flag_test(flow, flag) __flow_flag_test(flow, \ - MLX5E_TC_FLOW_FLAG_##flag) - bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) { return flow_flag_test(flow, ESWITCH); @@ -431,7 +347,7 @@ static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow) return flow_flag_test(flow, FT); } -static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) +bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) { return flow_flag_test(flow, OFFLOADED); } @@ -1146,23 +1062,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, kfree(flow->attr); } -static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, int out_index); - -static int mlx5e_attach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - struct net_device *mirred_dev, - int out_index, - struct netlink_ext_ack *extack, - struct net_device **encap_dev, - bool *encap_valid); -static int mlx5e_attach_decap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack); -static void mlx5e_detach_decap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow); - -static struct mlx5_flow_handle * +struct mlx5_flow_handle * mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, @@ -1197,10 +1097,9 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, return rule; } -static void -mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, - struct mlx5e_tc_flow *flow, - struct mlx5_flow_attr *attr) +void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr) { flow_flag_clear(flow, OFFLOADED); @@ -1219,7 +1118,7 @@ offload_rule_0: mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); } -static struct mlx5_flow_handle * +struct mlx5_flow_handle * mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec) @@ -1245,9 +1144,8 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, return rule; } -static void -mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, - struct mlx5e_tc_flow *flow) +void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow) { struct mlx5_flow_attr *slow_attr; @@ -1315,6 +1213,63 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow) mutex_unlock(&uplink_priv->unready_flows_lock); } +static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv); + +bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev) +{ + struct mlx5_core_dev *out_mdev, *route_mdev; + struct mlx5e_priv *out_priv, *route_priv; + + out_priv = netdev_priv(out_dev); + out_mdev = out_priv->mdev; + route_priv = netdev_priv(route_dev); + route_mdev = route_priv->mdev; + + if (out_mdev->coredev_type != MLX5_COREDEV_PF || + route_mdev->coredev_type != MLX5_COREDEV_VF) + return false; + + return same_hw_devs(out_priv, route_priv); +} + +int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport) +{ + struct mlx5e_priv *out_priv, *route_priv; + struct mlx5_core_dev *route_mdev; + struct mlx5_eswitch *esw; + u16 vhca_id; + int err; + + out_priv = netdev_priv(out_dev); + esw = out_priv->mdev->priv.eswitch; + route_priv = netdev_priv(route_dev); + route_mdev = route_priv->mdev; + + vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id); + err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); + return err; +} + +int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) +{ + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &parse_attr->mod_hdr_acts; + struct mlx5_modify_hdr *mod_hdr; + + mod_hdr = mlx5_modify_header_alloc(priv->mdev, + get_flow_name_space(flow), + mod_hdr_acts->num_actions, + mod_hdr_acts->actions); + if (IS_ERR(mod_hdr)) + return PTR_ERR(mod_hdr); + + WARN_ON(flow->attr->modify_hdr); + flow->attr->modify_hdr = mod_hdr; + + return 0; +} + static int mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, @@ -1324,11 +1279,11 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct net_device *out_dev, *encap_dev = NULL; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; + bool vf_tun = false, encap_valid = true; struct mlx5_esw_flow_attr *esw_attr; struct mlx5_fc *counter = NULL; struct mlx5e_rep_priv *rpriv; struct mlx5e_priv *out_priv; - bool encap_valid = true; u32 max_prio, max_chain; int err = 0; int out_index; @@ -1342,20 +1297,28 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) { NL_SET_ERR_MSG_MOD(extack, "Requested chain is out of supported range"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; } max_prio = mlx5_chains_get_prio_range(esw_chains(esw)); if (attr->prio > max_prio) { NL_SET_ERR_MSG_MOD(extack, "Requested priority is out of supported range"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; + } + + if (flow_flag_test(flow, TUN_RX)) { + err = mlx5e_attach_decap_route(priv, flow); + if (err) + goto err_out; } if (flow_flag_test(flow, L3_TO_L2_DECAP)) { err = mlx5e_attach_decap(priv, flow, extack); if (err) - return err; + goto err_out; } parse_attr = attr->parse_attr; @@ -1373,8 +1336,11 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, err = mlx5e_attach_encap(priv, flow, out_dev, out_index, extack, &encap_dev, &encap_valid); if (err) - return err; + goto err_out; + if (esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + vf_tun = true; out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; esw_attr->dests[out_index].rep = rpriv->rep; @@ -1383,20 +1349,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, err = mlx5_eswitch_add_vlan_action(esw, attr); if (err) - return err; + goto err_out; if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) { - err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); - if (err) - return err; + if (vf_tun) { + err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow); + if (err) + goto err_out; + } else { + err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); + if (err) + goto err_out; + } } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(esw_attr->counter_dev, true); - if (IS_ERR(counter)) - return PTR_ERR(counter); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_out; + } attr->counter = counter; } @@ -1410,12 +1383,17 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, else flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); - if (IS_ERR(flow->rule[0])) - return PTR_ERR(flow->rule[0]); - else - flow_flag_set(flow, OFFLOADED); + if (IS_ERR(flow->rule[0])) { + err = PTR_ERR(flow->rule[0]); + goto err_out; + } + flow_flag_set(flow, OFFLOADED); return 0; + +err_out: + flow_flag_set(flow, FAILED); + return err; } static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) @@ -1436,8 +1414,11 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *esw_attr; + bool vf_tun = false; int out_index; + esw_attr = attr->esw_attr; mlx5e_put_flow_tunnel_id(flow); if (flow_flag_test(flow, NOT_READY)) @@ -1455,20 +1436,33 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5_eswitch_del_vlan_action(esw, attr); - for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) - if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) { + if (flow->decap_route) + mlx5e_detach_decap_route(priv, flow); + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + if (esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + vf_tun = true; + if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) { mlx5e_detach_encap(priv, flow, out_index); kfree(attr->parse_attr->tun_info[out_index]); } - kvfree(attr->parse_attr); + } mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr); - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - mlx5e_detach_mod_hdr(priv, flow); + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + dealloc_mod_hdr_actions(&attr->parse_attr->mod_hdr_acts); + if (vf_tun && attr->modify_hdr) + mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); + else + mlx5e_detach_mod_hdr(priv, flow); + } + kvfree(attr->parse_attr); + kvfree(attr->esw_attr->rx_tun_attr); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) - mlx5_fc_destroy(attr->esw_attr->counter_dev, attr->counter); + mlx5_fc_destroy(esw_attr->counter_dev, attr->counter); if (flow_flag_test(flow, L3_TO_L2_DECAP)) mlx5e_detach_decap(priv, flow); @@ -1476,141 +1470,13 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, kfree(flow->attr); } -void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e, - struct list_head *flow_list) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr *esw_attr; - struct mlx5_flow_handle *rule; - struct mlx5_flow_attr *attr; - struct mlx5_flow_spec *spec; - struct mlx5e_tc_flow *flow; - int err; - - e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, - e->reformat_type, - e->encap_size, e->encap_header, - MLX5_FLOW_NAMESPACE_FDB); - if (IS_ERR(e->pkt_reformat)) { - mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", - PTR_ERR(e->pkt_reformat)); - return; - } - e->flags |= MLX5_ENCAP_ENTRY_VALID; - mlx5e_rep_queue_neigh_stats_work(priv); - - list_for_each_entry(flow, flow_list, tmp_list) { - bool all_flow_encaps_valid = true; - int i; - - if (!mlx5e_is_offloaded_flow(flow)) - continue; - attr = flow->attr; - esw_attr = attr->esw_attr; - spec = &attr->parse_attr->spec; - - esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat; - esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; - /* Flow can be associated with multiple encap entries. - * Before offloading the flow verify that all of them have - * a valid neighbour. - */ - for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { - if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) - continue; - if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { - all_flow_encaps_valid = false; - break; - } - } - /* Do not offload flows with unresolved neighbors */ - if (!all_flow_encaps_valid) - continue; - /* update from slow path rule to encap rule */ - rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", - err); - continue; - } - - mlx5e_tc_unoffload_from_slow_path(esw, flow); - flow->rule[0] = rule; - /* was unset when slow path rule removed */ - flow_flag_set(flow, OFFLOADED); - } -} - -void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, - struct mlx5e_encap_entry *e, - struct list_head *flow_list) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr *esw_attr; - struct mlx5_flow_handle *rule; - struct mlx5_flow_attr *attr; - struct mlx5_flow_spec *spec; - struct mlx5e_tc_flow *flow; - int err; - - list_for_each_entry(flow, flow_list, tmp_list) { - if (!mlx5e_is_offloaded_flow(flow)) - continue; - attr = flow->attr; - esw_attr = attr->esw_attr; - spec = &attr->parse_attr->spec; - - /* update from encap rule to slow path rule */ - rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); - /* mark the flow's encap dest as non-valid */ - esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; - - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", - err); - continue; - } - - mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); - flow->rule[0] = rule; - /* was unset when fast path rule removed */ - flow_flag_set(flow, OFFLOADED); - } - - /* we know that the encap is valid */ - e->flags &= ~MLX5_ENCAP_ENTRY_VALID; - mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); -} - -static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) +struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) { return flow->attr->counter; } -/* Takes reference to all flows attached to encap and adds the flows to - * flow_list using 'tmp_list' list_head in mlx5e_tc_flow. - */ -void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list) -{ - struct encap_flow_item *efi; - struct mlx5e_tc_flow *flow; - - list_for_each_entry(efi, &e->flows, list) { - flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); - if (IS_ERR(mlx5e_flow_get(flow))) - continue; - wait_for_completion(&flow->init_done); - - flow->tmp_efi_index = efi->index; - list_add(&flow->tmp_list, flow_list); - } -} - /* Iterate over tmp_list of flows attached to flow_list head. */ -void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) +void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) { struct mlx5e_tc_flow *flow, *tmp; @@ -1618,222 +1484,6 @@ void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_l mlx5e_flow_put(priv, flow); } -static struct mlx5e_encap_entry * -mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, - struct mlx5e_encap_entry *e) -{ - struct mlx5e_encap_entry *next = NULL; - -retry: - rcu_read_lock(); - - /* find encap with non-zero reference counter value */ - for (next = e ? - list_next_or_null_rcu(&nhe->encap_list, - &e->encap_list, - struct mlx5e_encap_entry, - encap_list) : - list_first_or_null_rcu(&nhe->encap_list, - struct mlx5e_encap_entry, - encap_list); - next; - next = list_next_or_null_rcu(&nhe->encap_list, - &next->encap_list, - struct mlx5e_encap_entry, - encap_list)) - if (mlx5e_encap_take(next)) - break; - - rcu_read_unlock(); - - /* release starting encap */ - if (e) - mlx5e_encap_put(netdev_priv(e->out_dev), e); - if (!next) - return next; - - /* wait for encap to be fully initialized */ - wait_for_completion(&next->res_ready); - /* continue searching if encap entry is not in valid state after completion */ - if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) { - e = next; - goto retry; - } - - return next; -} - -void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) -{ - struct mlx5e_neigh *m_neigh = &nhe->m_neigh; - struct mlx5e_encap_entry *e = NULL; - struct mlx5e_tc_flow *flow; - struct mlx5_fc *counter; - struct neigh_table *tbl; - bool neigh_used = false; - struct neighbour *n; - u64 lastuse; - - if (m_neigh->family == AF_INET) - tbl = &arp_tbl; -#if IS_ENABLED(CONFIG_IPV6) - else if (m_neigh->family == AF_INET6) - tbl = ipv6_stub->nd_tbl; -#endif - else - return; - - /* mlx5e_get_next_valid_encap() releases previous encap before returning - * next one. - */ - while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) { - struct mlx5e_priv *priv = netdev_priv(e->out_dev); - struct encap_flow_item *efi, *tmp; - struct mlx5_eswitch *esw; - LIST_HEAD(flow_list); - - esw = priv->mdev->priv.eswitch; - mutex_lock(&esw->offloads.encap_tbl_lock); - list_for_each_entry_safe(efi, tmp, &e->flows, list) { - flow = container_of(efi, struct mlx5e_tc_flow, - encaps[efi->index]); - if (IS_ERR(mlx5e_flow_get(flow))) - continue; - list_add(&flow->tmp_list, &flow_list); - - if (mlx5e_is_offloaded_flow(flow)) { - counter = mlx5e_tc_get_counter(flow); - lastuse = mlx5_fc_query_lastuse(counter); - if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { - neigh_used = true; - break; - } - } - } - mutex_unlock(&esw->offloads.encap_tbl_lock); - - mlx5e_put_encap_flow_list(priv, &flow_list); - if (neigh_used) { - /* release current encap before breaking the loop */ - mlx5e_encap_put(priv, e); - break; - } - } - - trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used); - - if (neigh_used) { - nhe->reported_lastuse = jiffies; - - /* find the relevant neigh according to the cached device and - * dst ip pair - */ - n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev); - if (!n) - return; - - neigh_event_send(n, NULL); - neigh_release(n); - } -} - -static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) -{ - WARN_ON(!list_empty(&e->flows)); - - if (e->compl_result > 0) { - mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); - - if (e->flags & MLX5_ENCAP_ENTRY_VALID) - mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); - } - - kfree(e->tun_info); - kfree(e->encap_header); - kfree_rcu(e, rcu); -} - -static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, - struct mlx5e_decap_entry *d) -{ - WARN_ON(!list_empty(&d->flows)); - - if (!d->compl_result) - mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat); - - kfree_rcu(d, rcu); -} - -void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - - if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) - return; - hash_del_rcu(&e->encap_hlist); - mutex_unlock(&esw->offloads.encap_tbl_lock); - - mlx5e_encap_dealloc(priv, e); -} - -static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - - if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock)) - return; - hash_del_rcu(&d->hlist); - mutex_unlock(&esw->offloads.decap_tbl_lock); - - mlx5e_decap_dealloc(priv, d); -} - -static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, int out_index) -{ - struct mlx5e_encap_entry *e = flow->encaps[out_index].e; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - - /* flow wasn't fully initialized */ - if (!e) - return; - - mutex_lock(&esw->offloads.encap_tbl_lock); - list_del(&flow->encaps[out_index].list); - flow->encaps[out_index].e = NULL; - if (!refcount_dec_and_test(&e->refcnt)) { - mutex_unlock(&esw->offloads.encap_tbl_lock); - return; - } - hash_del_rcu(&e->encap_hlist); - mutex_unlock(&esw->offloads.encap_tbl_lock); - - mlx5e_encap_dealloc(priv, e); -} - -static void mlx5e_detach_decap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_decap_entry *d = flow->decap_reformat; - - if (!d) - return; - - mutex_lock(&esw->offloads.decap_tbl_lock); - list_del(&flow->l3_to_l2_reformat); - flow->decap_reformat = NULL; - - if (!refcount_dec_and_test(&d->refcnt)) { - mutex_unlock(&esw->offloads.decap_tbl_lock); - return; - } - hash_del_rcu(&d->hlist); - mutex_unlock(&esw->offloads.decap_tbl_lock); - - mlx5e_decap_dealloc(priv, d); -} - static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; @@ -2091,6 +1741,29 @@ void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, } } +u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer) +{ + void *headers_v; + u16 ethertype; + u8 ip_version; + + if (outer) + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + else + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers); + + ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version); + /* Return ip_version converted from ethertype anyway */ + if (!ip_version) { + ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); + if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP) + ip_version = 4; + else if (ethertype == ETH_P_IPV6) + ip_version = 6; + } + return ip_version; +} + static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, @@ -2099,6 +1772,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, u8 *match_level, bool *match_inner) { + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct netlink_ext_ack *extack = f->common.extack; bool needs_mapping, sets_mapping; @@ -2136,6 +1810,31 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, */ if (!netif_is_bareudp(filter_dev)) flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + err = mlx5e_tc_set_attr_rx_tun(flow, spec); + if (err) + return err; + } else if (tunnel && tunnel->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + struct mlx5_flow_spec *tmp_spec; + + tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL); + if (!tmp_spec) { + NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for vxlan tmp spec"); + netdev_warn(priv->netdev, "Failed to allocate memory for vxlan tmp spec"); + return -ENOMEM; + } + memcpy(tmp_spec, spec, sizeof(*tmp_spec)); + + err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level); + if (err) { + kvfree(tmp_spec); + NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes"); + netdev_warn(priv->netdev, "Failed to parse tunnel attributes"); + return err; + } + err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); + kvfree(tmp_spec); + if (err) + return err; } if (!needs_mapping && !sets_mapping) @@ -3584,35 +3283,6 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, return 0; } -struct encap_key { - const struct ip_tunnel_key *ip_tun_key; - struct mlx5e_tc_tunnel *tc_tunnel; -}; - -static inline int cmp_encap_info(struct encap_key *a, - struct encap_key *b) -{ - return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || - a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; -} - -static inline int cmp_decap_info(struct mlx5e_decap_key *a, - struct mlx5e_decap_key *b) -{ - return memcmp(&a->key, &b->key, sizeof(b->key)); -} - -static inline int hash_encap_info(struct encap_key *key) -{ - return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), - key->tc_tunnel->tunnel_type); -} - -static inline int hash_decap_info(struct mlx5e_decap_key *key) -{ - return jhash(&key->key, sizeof(key->key), 0); -} - static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, struct net_device *peer_netdev) { @@ -3626,277 +3296,6 @@ static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, same_hw_devs(priv, peer_priv)); } -bool mlx5e_encap_take(struct mlx5e_encap_entry *e) -{ - return refcount_inc_not_zero(&e->refcnt); -} - -static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) -{ - return refcount_inc_not_zero(&e->refcnt); -} - -static struct mlx5e_encap_entry * -mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, - uintptr_t hash_key) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_encap_entry *e; - struct encap_key e_key; - - hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, - encap_hlist, hash_key) { - e_key.ip_tun_key = &e->tun_info->key; - e_key.tc_tunnel = e->tunnel; - if (!cmp_encap_info(&e_key, key) && - mlx5e_encap_take(e)) - return e; - } - - return NULL; -} - -static struct mlx5e_decap_entry * -mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, - uintptr_t hash_key) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_decap_key r_key; - struct mlx5e_decap_entry *e; - - hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, - hlist, hash_key) { - r_key = e->key; - if (!cmp_decap_info(&r_key, key) && - mlx5e_decap_take(e)) - return e; - } - return NULL; -} - -static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info) -{ - size_t tun_size = sizeof(*tun_info) + tun_info->options_len; - - return kmemdup(tun_info, tun_size, GFP_KERNEL); -} - -static bool is_duplicated_encap_entry(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - int out_index, - struct mlx5e_encap_entry *e, - struct netlink_ext_ack *extack) -{ - int i; - - for (i = 0; i < out_index; i++) { - if (flow->encaps[i].e != e) - continue; - NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action"); - netdev_err(priv->netdev, "can't duplicate encap action\n"); - return true; - } - - return false; -} - -static int mlx5e_attach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - struct net_device *mirred_dev, - int out_index, - struct netlink_ext_ack *extack, - struct net_device **encap_dev, - bool *encap_valid) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5_flow_attr *attr = flow->attr; - const struct ip_tunnel_info *tun_info; - struct encap_key key; - struct mlx5e_encap_entry *e; - unsigned short family; - uintptr_t hash_key; - int err = 0; - - parse_attr = attr->parse_attr; - tun_info = parse_attr->tun_info[out_index]; - family = ip_tunnel_info_af(tun_info); - key.ip_tun_key = &tun_info->key; - key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); - if (!key.tc_tunnel) { - NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel"); - return -EOPNOTSUPP; - } - - hash_key = hash_encap_info(&key); - - mutex_lock(&esw->offloads.encap_tbl_lock); - e = mlx5e_encap_get(priv, &key, hash_key); - - /* must verify if encap is valid or not */ - if (e) { - /* Check that entry was not already attached to this flow */ - if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) { - err = -EOPNOTSUPP; - goto out_err; - } - - mutex_unlock(&esw->offloads.encap_tbl_lock); - wait_for_completion(&e->res_ready); - - /* Protect against concurrent neigh update. */ - mutex_lock(&esw->offloads.encap_tbl_lock); - if (e->compl_result < 0) { - err = -EREMOTEIO; - goto out_err; - } - goto attach_flow; - } - - e = kzalloc(sizeof(*e), GFP_KERNEL); - if (!e) { - err = -ENOMEM; - goto out_err; - } - - refcount_set(&e->refcnt, 1); - init_completion(&e->res_ready); - - tun_info = dup_tun_info(tun_info); - if (!tun_info) { - err = -ENOMEM; - goto out_err_init; - } - e->tun_info = tun_info; - err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); - if (err) - goto out_err_init; - - INIT_LIST_HEAD(&e->flows); - hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); - mutex_unlock(&esw->offloads.encap_tbl_lock); - - if (family == AF_INET) - err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); - else if (family == AF_INET6) - err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); - - /* Protect against concurrent neigh update. */ - mutex_lock(&esw->offloads.encap_tbl_lock); - complete_all(&e->res_ready); - if (err) { - e->compl_result = err; - goto out_err; - } - e->compl_result = 1; - -attach_flow: - flow->encaps[out_index].e = e; - list_add(&flow->encaps[out_index].list, &e->flows); - flow->encaps[out_index].index = out_index; - *encap_dev = e->out_dev; - if (e->flags & MLX5_ENCAP_ENTRY_VALID) { - attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; - attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; - *encap_valid = true; - } else { - *encap_valid = false; - } - mutex_unlock(&esw->offloads.encap_tbl_lock); - - return err; - -out_err: - mutex_unlock(&esw->offloads.encap_tbl_lock); - if (e) - mlx5e_encap_put(priv, e); - return err; - -out_err_init: - mutex_unlock(&esw->offloads.encap_tbl_lock); - kfree(tun_info); - kfree(e); - return err; -} - -static int mlx5e_attach_decap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; - struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5e_decap_entry *d; - struct mlx5e_decap_key key; - uintptr_t hash_key; - int err = 0; - - parse_attr = flow->attr->parse_attr; - if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { - NL_SET_ERR_MSG_MOD(extack, - "encap header larger than max supported"); - return -EOPNOTSUPP; - } - - key.key = parse_attr->eth; - hash_key = hash_decap_info(&key); - mutex_lock(&esw->offloads.decap_tbl_lock); - d = mlx5e_decap_get(priv, &key, hash_key); - if (d) { - mutex_unlock(&esw->offloads.decap_tbl_lock); - wait_for_completion(&d->res_ready); - mutex_lock(&esw->offloads.decap_tbl_lock); - if (d->compl_result) { - err = -EREMOTEIO; - goto out_free; - } - goto found; - } - - d = kzalloc(sizeof(*d), GFP_KERNEL); - if (!d) { - err = -ENOMEM; - goto out_err; - } - - d->key = key; - refcount_set(&d->refcnt, 1); - init_completion(&d->res_ready); - INIT_LIST_HEAD(&d->flows); - hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); - mutex_unlock(&esw->offloads.decap_tbl_lock); - - d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, - MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2, - sizeof(parse_attr->eth), - &parse_attr->eth, - MLX5_FLOW_NAMESPACE_FDB); - if (IS_ERR(d->pkt_reformat)) { - err = PTR_ERR(d->pkt_reformat); - d->compl_result = err; - } - mutex_lock(&esw->offloads.decap_tbl_lock); - complete_all(&d->res_ready); - if (err) - goto out_free; - -found: - flow->decap_reformat = d; - attr->decap_pkt_reformat = d->pkt_reformat; - list_add(&flow->l3_to_l2_reformat, &d->flows); - mutex_unlock(&esw->offloads.decap_tbl_lock); - return 0; - -out_free: - mutex_unlock(&esw->offloads.decap_tbl_lock); - mlx5e_decap_put(priv, d); - return err; - -out_err: - mutex_unlock(&esw->offloads.decap_tbl_lock); - return err; -} - static int parse_tc_vlan_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, struct mlx5_esw_flow_attr *attr, @@ -4249,7 +3648,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (encap) { parse_attr->mirred_ifindex[esw_attr->out_count] = out_dev->ifindex; - parse_attr->tun_info[esw_attr->out_count] = dup_tun_info(info); + parse_attr->tun_info[esw_attr->out_count] = + mlx5e_dup_tun_info(info); if (!parse_attr->tun_info[esw_attr->out_count]) return -ENOMEM; encap = false; @@ -4386,6 +3786,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, } } + /* always set IP version for indirect table handling */ + attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true); + if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) { /* For prio tag mode, replace vlan pop with rewrite vlan prio @@ -4666,7 +4069,6 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, return flow; err_free: - dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); mlx5e_flow_put(priv, flow); out: return ERR_PTR(err); @@ -4811,6 +4213,7 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, return 0; err_free: + flow_flag_set(flow, FAILED); dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); mlx5e_flow_put(priv, flow); out: @@ -5332,7 +4735,8 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) } uplink_priv->tunnel_mapping = mapping; - mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true); + /* 0xFFF is reserved for stack devices slow path table mark */ + mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true); if (IS_ERR(mapping)) { err = PTR_ERR(mapping); goto err_enc_opts_mapping; @@ -5345,8 +4749,16 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key); - return err; + uplink_priv->encap = mlx5e_tc_tun_init(priv); + if (IS_ERR(uplink_priv->encap)) { + err = PTR_ERR(uplink_priv->encap); + goto err_register_fib_notifier; + } + return 0; + +err_register_fib_notifier: + rhashtable_destroy(tc_ht); err_ht_init: mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); err_enc_opts_mapping: @@ -5363,10 +4775,11 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) { struct mlx5_rep_uplink_priv *uplink_priv; - rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); - uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); + rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); + mlx5e_tc_tun_cleanup(uplink_priv->encap); + mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); mapping_destroy(uplink_priv->tunnel_mapping); @@ -5466,7 +4879,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, tc_skb_ext->chain = chain; zone_restore_id = (reg_b >> REG_MAPPING_SHIFT(NIC_ZONE_RESTORE_TO_REG)) & - ZONE_RESTORE_MAX; + ESW_ZONE_ID_MASK; if (!mlx5e_tc_ct_restore_flow(tc->ct, skb, zone_restore_id)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 4a2ce241522e..89003ae7775a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -37,6 +37,8 @@ #include "en.h" #include "eswitch.h" #include "en/tc_ct.h" +#include "en/tc_tun.h" +#include "en_rep.h" #define MLX5E_TC_FLOW_ID_MASK 0x0000ffff @@ -76,6 +78,7 @@ struct mlx5_flow_attr { struct mlx5_flow_table *dest_ft; u8 inner_match_level; u8 outer_match_level; + u8 ip_version; u32 flags; union { struct mlx5_esw_flow_attr esw_attr[0]; @@ -83,6 +86,19 @@ struct mlx5_flow_attr { }; }; +struct mlx5_rx_tun_attr { + u16 decap_vport; + union { + __be32 v4; + struct in6_addr v6; + } src_ip; /* Valid if decap_vport is not zero */ + union { + __be32 v4; + struct in6_addr v6; + } dst_ip; /* Valid if decap_vport is not zero */ + u32 vni; +}; + #define MLX5E_TC_TABLE_CHAIN_TAG_BITS 16 #define MLX5E_TC_TABLE_CHAIN_TAG_MASK GENMASK(MLX5E_TC_TABLE_CHAIN_TAG_BITS - 1, 0) @@ -158,7 +174,7 @@ bool mlx5e_encap_take(struct mlx5e_encap_entry *e); void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list); -void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list); +void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list); struct mlx5e_neigh_hash_entry; void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); @@ -167,6 +183,7 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work); enum mlx5e_tc_attr_to_reg { CHAIN_TO_REG, + VPORT_TO_REG, TUNNEL_TO_REG, CTSTATE_TO_REG, ZONE_TO_REG, @@ -197,6 +214,11 @@ int mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev, enum mlx5e_tc_attr_to_reg type, u32 data); +void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5e_tc_attr_to_reg type, + int act_id, u32 data); + void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, enum mlx5e_tc_attr_to_reg type, u32 data, @@ -207,6 +229,16 @@ void mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec, u32 *data, u32 *mask); +int mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, + enum mlx5_flow_namespace_type ns, + enum mlx5e_tc_attr_to_reg type, + u32 data); + +int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow); + int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, int namespace, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); @@ -242,6 +274,10 @@ mlx5_tc_rule_delete(struct mlx5e_priv *priv, struct mlx5_flow_handle *rule, struct mlx5_flow_attr *attr); +bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev); +int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, + u16 *vport); + #else /* CONFIG_MLX5_CLS_ACT */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} @@ -283,7 +319,7 @@ static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) reg_b = be32_to_cpu(cqe->ft_metadata); - if (reg_b >> (MLX5E_TC_TABLE_CHAIN_TAG_BITS + ZONE_RESTORE_BITS)) + if (reg_b >> (MLX5E_TC_TABLE_CHAIN_TAG_BITS + ESW_ZONE_ID_BITS)) return false; chain = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c new file mode 100644 index 000000000000..6f6772bf61a2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c @@ -0,0 +1,517 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies. */ + +#include <linux/etherdevice.h> +#include <linux/idr.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include "mlx5_core.h" +#include "eswitch.h" +#include "en.h" +#include "en_tc.h" +#include "fs_core.h" +#include "esw/indir_table.h" +#include "lib/fs_chains.h" + +#define MLX5_ESW_INDIR_TABLE_SIZE 128 +#define MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX (MLX5_ESW_INDIR_TABLE_SIZE - 2) +#define MLX5_ESW_INDIR_TABLE_FWD_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 1) + +struct mlx5_esw_indir_table_rule { + struct list_head list; + struct mlx5_flow_handle *handle; + union { + __be32 v4; + struct in6_addr v6; + } dst_ip; + u32 vni; + struct mlx5_modify_hdr *mh; + refcount_t refcnt; +}; + +struct mlx5_esw_indir_table_entry { + struct hlist_node hlist; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *recirc_grp; + struct mlx5_flow_group *fwd_grp; + struct mlx5_flow_handle *fwd_rule; + struct list_head recirc_rules; + int recirc_cnt; + int fwd_ref; + + u16 vport; + u8 ip_version; +}; + +struct mlx5_esw_indir_table { + struct mutex lock; /* protects table */ + DECLARE_HASHTABLE(table, 8); +}; + +struct mlx5_esw_indir_table * +mlx5_esw_indir_table_init(void) +{ + struct mlx5_esw_indir_table *indir = kvzalloc(sizeof(*indir), GFP_KERNEL); + + if (!indir) + return ERR_PTR(-ENOMEM); + + mutex_init(&indir->lock); + hash_init(indir->table); + return indir; +} + +void +mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir) +{ + mutex_destroy(&indir->lock); + kvfree(indir); +} + +bool +mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport_num, + struct mlx5_core_dev *dest_mdev) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + + /* Use indirect table for all IP traffic from UL to VF with vport + * destination when source rewrite flag is set. + */ + return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK && + mlx5_eswitch_is_vf_vport(esw, vport_num) && + esw->dev == dest_mdev && + attr->ip_version && + attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE; +} + +u16 +mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + + return esw_attr->rx_tun_attr ? esw_attr->rx_tun_attr->decap_vport : 0; +} + +static struct mlx5_esw_indir_table_rule * +mlx5_esw_indir_table_rule_lookup(struct mlx5_esw_indir_table_entry *e, + struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_esw_indir_table_rule *rule; + + list_for_each_entry(rule, &e->recirc_rules, list) + if (rule->vni == attr->rx_tun_attr->vni && + !memcmp(&rule->dst_ip, &attr->rx_tun_attr->dst_ip, + sizeof(attr->rx_tun_attr->dst_ip))) + goto found; + return NULL; + +found: + refcount_inc(&rule->refcnt); + return rule; +} + +static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + struct mlx5_esw_indir_table_entry *e) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + struct mlx5e_tc_mod_hdr_acts mod_acts = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5_esw_indir_table_rule *rule; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + int err = 0; + u32 data; + + rule = mlx5_esw_indir_table_rule_lookup(e, esw_attr); + if (rule) + return 0; + + if (e->recirc_cnt == MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX) + return -EINVAL; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return -ENOMEM; + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) { + err = -ENOMEM; + goto out; + } + + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS | + MLX5_MATCH_MISC_PARAMETERS_2; + if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) { + MLX5_SET(fte_match_param, rule_spec->match_criteria, + outer_headers.ip_version, 0xf); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version, + attr->ip_version); + } else if (attr->ip_version) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.ethertype); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ethertype, + (attr->ip_version == 4 ? ETH_P_IP : ETH_P_IPV6)); + } else { + err = -EOPNOTSUPP; + goto err_ethertype; + } + + if (attr->ip_version == 4) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + MLX5_SET(fte_match_param, rule_spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4, + ntohl(esw_attr->rx_tun_attr->dst_ip.v4)); + } else if (attr->ip_version == 6) { + int len = sizeof(struct in6_addr); + + memset(MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, len); + memcpy(MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &esw_attr->rx_tun_attr->dst_ip.v6, len); + } + + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + misc_parameters.vxlan_vni); + MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters.vxlan_vni, + MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni)); + + MLX5_SET(fte_match_param, rule_spec->match_criteria, + misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw_attr->in_mdev->priv.eswitch, + MLX5_VPORT_UPLINK)); + + /* Modify flow source to recirculate packet */ + data = mlx5_eswitch_get_vport_metadata_for_set(esw, esw_attr->rx_tun_attr->decap_vport); + err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, + VPORT_TO_REG, data); + if (err) + goto err_mod_hdr_regc0; + + err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, + TUNNEL_TO_REG, ESW_TUN_SLOW_TABLE_GOTO_VPORT); + if (err) + goto err_mod_hdr_regc1; + + flow_act.modify_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, + mod_acts.num_actions, mod_acts.actions); + if (IS_ERR(flow_act.modify_hdr)) { + err = PTR_ERR(flow_act.modify_hdr); + goto err_mod_hdr_alloc; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = mlx5_chains_get_table(chains, 0, 1, 0); + if (IS_ERR(dest.ft)) { + err = PTR_ERR(dest.ft); + goto err_table; + } + handle = mlx5_add_flow_rules(e->ft, rule_spec, &flow_act, &dest, 1); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto err_handle; + } + + dealloc_mod_hdr_actions(&mod_acts); + rule->handle = handle; + rule->vni = esw_attr->rx_tun_attr->vni; + rule->mh = flow_act.modify_hdr; + memcpy(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip, + sizeof(esw_attr->rx_tun_attr->dst_ip)); + refcount_set(&rule->refcnt, 1); + list_add(&rule->list, &e->recirc_rules); + e->recirc_cnt++; + goto out; + +err_handle: + mlx5_chains_put_table(chains, 0, 1, 0); +err_table: + mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr); +err_mod_hdr_alloc: +err_mod_hdr_regc1: + dealloc_mod_hdr_actions(&mod_acts); +err_mod_hdr_regc0: +err_ethertype: + kfree(rule); +out: + kfree(rule_spec); + return err; +} + +static void mlx5_esw_indir_table_rule_put(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_esw_indir_table_entry *e) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + struct mlx5_esw_indir_table_rule *rule; + + list_for_each_entry(rule, &e->recirc_rules, list) + if (rule->vni == esw_attr->rx_tun_attr->vni && + !memcmp(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip, + sizeof(esw_attr->rx_tun_attr->dst_ip))) + goto found; + + return; + +found: + if (!refcount_dec_and_test(&rule->refcnt)) + return; + + mlx5_del_flow_rules(rule->handle); + mlx5_chains_put_table(chains, 0, 1, 0); + mlx5_modify_header_dealloc(esw->dev, rule->mh); + list_del(&rule->list); + kfree(rule); + e->recirc_cnt--; +} + +static int mlx5_create_indir_recirc_group(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + struct mlx5_esw_indir_table_entry *e) +{ + int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS | MLX5_MATCH_MISC_PARAMETERS_2); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) + MLX5_SET(fte_match_param, match, outer_headers.ip_version, 0xf); + else + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ethertype); + + if (attr->ip_version == 4) { + MLX5_SET_TO_ONES(fte_match_param, match, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + } else if (attr->ip_version == 6) { + memset(MLX5_ADDR_OF(fte_match_param, match, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, sizeof(struct in6_addr)); + } else { + err = -EOPNOTSUPP; + goto out; + } + + MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters.vxlan_vni); + MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(create_flow_group_in, in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX); + e->recirc_grp = mlx5_create_flow_group(e->ft, in); + if (IS_ERR(e->recirc_grp)) { + err = PTR_ERR(e->recirc_grp); + goto out; + } + + INIT_LIST_HEAD(&e->recirc_rules); + e->recirc_cnt = 0; + +out: + kfree(in); + return err; +} + +static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw, + struct mlx5_esw_indir_table_entry *e) +{ + int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + kfree(in); + return -ENOMEM; + } + + /* Hold one entry */ + MLX5_SET(create_flow_group_in, in, start_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); + MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); + e->fwd_grp = mlx5_create_flow_group(e->ft, in); + if (IS_ERR(e->fwd_grp)) { + err = PTR_ERR(e->fwd_grp); + goto err_out; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = e->vport; + dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1); + if (IS_ERR(e->fwd_rule)) { + mlx5_destroy_flow_group(e->fwd_grp); + err = PTR_ERR(e->fwd_rule); + } + +err_out: + kfree(spec); + kfree(in); + return err; +} + +static struct mlx5_esw_indir_table_entry * +mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, u16 vport, bool decap) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *root_ns; + struct mlx5_esw_indir_table_entry *e; + struct mlx5_flow_table *ft; + int err = 0; + + root_ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) + return ERR_PTR(-ENOENT); + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + + ft_attr.prio = FDB_TC_OFFLOAD; + ft_attr.max_fte = MLX5_ESW_INDIR_TABLE_SIZE; + ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.level = 1; + + ft = mlx5_create_flow_table(root_ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto tbl_err; + } + e->ft = ft; + e->vport = vport; + e->ip_version = attr->ip_version; + e->fwd_ref = !decap; + + err = mlx5_create_indir_recirc_group(esw, attr, spec, e); + if (err) + goto recirc_grp_err; + + if (decap) { + err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e); + if (err) + goto recirc_rule_err; + } + + err = mlx5_create_indir_fwd_group(esw, e); + if (err) + goto fwd_grp_err; + + hash_add(esw->fdb_table.offloads.indir->table, &e->hlist, + vport << 16 | attr->ip_version); + + return e; + +fwd_grp_err: + if (decap) + mlx5_esw_indir_table_rule_put(esw, attr, e); +recirc_rule_err: + mlx5_destroy_flow_group(e->recirc_grp); +recirc_grp_err: + mlx5_destroy_flow_table(e->ft); +tbl_err: + kfree(e); + return ERR_PTR(err); +} + +static struct mlx5_esw_indir_table_entry * +mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport, u8 ip_version) +{ + struct mlx5_esw_indir_table_entry *e; + u32 key = vport << 16 | ip_version; + + hash_for_each_possible(esw->fdb_table.offloads.indir->table, e, hlist, key) + if (e->vport == vport && e->ip_version == ip_version) + return e; + + return NULL; +} + +struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + u16 vport, bool decap) +{ + struct mlx5_esw_indir_table_entry *e; + int err; + + mutex_lock(&esw->fdb_table.offloads.indir->lock); + e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version); + if (e) { + if (!decap) { + e->fwd_ref++; + } else { + err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e); + if (err) + goto out_err; + } + } else { + e = mlx5_esw_indir_table_entry_create(esw, attr, spec, vport, decap); + if (IS_ERR(e)) { + err = PTR_ERR(e); + esw_warn(esw->dev, "Failed to create indirection table, err %d.\n", err); + goto out_err; + } + } + mutex_unlock(&esw->fdb_table.offloads.indir->lock); + return e->ft; + +out_err: + mutex_unlock(&esw->fdb_table.offloads.indir->lock); + return ERR_PTR(err); +} + +void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport, bool decap) +{ + struct mlx5_esw_indir_table_entry *e; + + mutex_lock(&esw->fdb_table.offloads.indir->lock); + e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version); + if (!e) + goto out; + + if (!decap) + e->fwd_ref--; + else + mlx5_esw_indir_table_rule_put(esw, attr, e); + + if (e->fwd_ref || e->recirc_cnt) + goto out; + + hash_del(&e->hlist); + mlx5_destroy_flow_group(e->recirc_grp); + mlx5_del_flow_rules(e->fwd_rule); + mlx5_destroy_flow_group(e->fwd_grp); + mlx5_destroy_flow_table(e->ft); + kfree(e); +out: + mutex_unlock(&esw->fdb_table.offloads.indir->lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h new file mode 100644 index 000000000000..cb9eafd1b4ee --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_ESW_FT_H__ +#define __MLX5_ESW_FT_H__ + +#ifdef CONFIG_MLX5_CLS_ACT + +struct mlx5_esw_indir_table * +mlx5_esw_indir_table_init(void); +void +mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir); + +struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + u16 vport, bool decap); +void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport, bool decap); + +bool +mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport_num, + struct mlx5_core_dev *dest_mdev); + +u16 +mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr); + +#else +/* indir API stubs */ +struct mlx5_esw_indir_table * +mlx5_esw_indir_table_init(void) +{ + return NULL; +} + +void +mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir) +{ +} + +static inline struct mlx5_flow_table * +mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + u16 vport, bool decap) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void +mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport, bool decap) +{ +} + +bool +mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + u16 vport_num, + struct mlx5_core_dev *dest_mdev) +{ + return false; +} + +static inline u16 +mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr) +{ + return 0; +} +#endif + +#endif /* __MLX5_ESW_FT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 820305b1664e..aba17835465b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1300,6 +1300,13 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num, (!vport_num && mlx5_core_is_ecpf(esw->dev))) vport->info.trusted = true; + if (!mlx5_esw_is_manager_vport(esw, vport->vport) && + MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { + ret = mlx5_esw_vport_vhca_id_set(esw, vport_num); + if (ret) + goto err_vhca_mapping; + } + esw_vport_change_handle_locked(vport); esw->enabled_vports++; @@ -1307,6 +1314,11 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num, done: mutex_unlock(&esw->state_lock); return ret; + +err_vhca_mapping: + esw_vport_cleanup(esw, vport); + mutex_unlock(&esw->state_lock); + return ret; } void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) @@ -1325,6 +1337,11 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) /* Disable events from this vport */ arm_vport_context_events_cmd(esw->dev, vport->vport, 0); + + if (!mlx5_esw_is_manager_vport(esw, vport->vport) && + MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) + mlx5_esw_vport_vhca_id_clear(esw, vport_num); + /* We don't assume VFs will cleanup after themselves. * Calling vport change handler while vport is disabled will cleanup * the vport resources. @@ -1815,6 +1832,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) mlx5e_mod_hdr_tbl_init(&esw->offloads.mod_hdr); atomic64_set(&esw->offloads.num_flows, 0); ida_init(&esw->offloads.vport_metadata_ida); + xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC); mutex_init(&esw->state_lock); mutex_init(&esw->mode_lock); @@ -1854,6 +1872,8 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw_offloads_cleanup_reps(esw); mutex_destroy(&esw->mode_lock); mutex_destroy(&esw->state_lock); + WARN_ON(!xa_empty(&esw->offloads.vhca_map)); + xa_destroy(&esw->offloads.vhca_map); ida_destroy(&esw->offloads.vport_metadata_ida); mlx5e_mod_hdr_tbl_destroy(&esw->offloads.mod_hdr); mutex_destroy(&esw->offloads.encap_tbl_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 479d2ac2cd85..fdf5c8c05c1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -36,6 +36,7 @@ #include <linux/if_ether.h> #include <linux/if_link.h> #include <linux/atomic.h> +#include <linux/xarray.h> #include <net/devlink.h> #include <linux/mlx5/device.h> #include <linux/mlx5/eswitch.h> @@ -160,6 +161,8 @@ struct mlx5_vport { struct devlink_port *dl_port; }; +struct mlx5_esw_indir_table; + struct mlx5_eswitch_fdb { union { struct legacy_fdb { @@ -176,9 +179,11 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_namespace *ns; struct mlx5_flow_table *slow_fdb; struct mlx5_flow_group *send_to_vport_grp; + struct mlx5_flow_group *send_to_vport_meta_grp; struct mlx5_flow_group *peer_miss_grp; struct mlx5_flow_handle **peer_miss_rules; struct mlx5_flow_group *miss_grp; + struct mlx5_flow_handle **send_to_vport_meta_rules; struct mlx5_flow_handle *miss_rule_uni; struct mlx5_flow_handle *miss_rule_multi; int vlan_push_pop_refcount; @@ -190,6 +195,8 @@ struct mlx5_eswitch_fdb { struct mutex lock; } vports; + struct mlx5_esw_indir_table *indir; + } offloads; }; u32 flags; @@ -212,6 +219,7 @@ struct mlx5_esw_offload { struct mod_hdr_tbl mod_hdr; DECLARE_HASHTABLE(termtbl_tbl, 8); struct mutex termtbl_mutex; /* protects termtbl hash */ + struct xarray vhca_map; const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; atomic64_t num_flows; @@ -387,12 +395,14 @@ enum mlx5_flow_match_level { enum { MLX5_ESW_DEST_ENCAP = BIT(0), MLX5_ESW_DEST_ENCAP_VALID = BIT(1), + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE = BIT(2), }; enum { MLX5_ESW_ATTR_FLAG_VLAN_HANDLED = BIT(0), MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1), MLX5_ESW_ATTR_FLAG_NO_IN_PORT = BIT(2), + MLX5_ESW_ATTR_FLAG_SRC_REWRITE = BIT(3), }; struct mlx5_esw_flow_attr { @@ -413,7 +423,9 @@ struct mlx5_esw_flow_attr { struct mlx5_pkt_reformat *pkt_reformat; struct mlx5_core_dev *mdev; struct mlx5_termtbl_handle *termtbl; + int src_port_rewrite_act_id; } dests[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5_rx_tun_attr *rx_tun_attr; struct mlx5_pkt_reformat *decap_pkt_reformat; }; @@ -734,6 +746,10 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p u16 vport_num, u32 sfnum); void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num); + /** * mlx5_esw_event_info - Indicates eswitch mode changed/changing. * diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7f09f2bbf7c1..94cb0217b4f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -38,7 +38,9 @@ #include <linux/mlx5/fs.h> #include "mlx5_core.h" #include "eswitch.h" +#include "esw/indir_table.h" #include "esw/acl/ofld.h" +#include "esw/indir_table.h" #include "rdma.h" #include "en.h" #include "fs_core.h" @@ -257,7 +259,9 @@ mlx5_eswitch_set_rule_flow_source(struct mlx5_eswitch *esw, static void mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, - struct mlx5_esw_flow_attr *attr) + struct mlx5_flow_attr *attr, + struct mlx5_eswitch *src_esw, + u16 vport) { void *misc2; void *misc; @@ -266,10 +270,12 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, * VHCA in dual-port RoCE mode, and matching on source vport may fail. */ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + if (mlx5_esw_indir_table_decap_vport(attr)) + vport = mlx5_esw_indir_table_decap_vport(attr); misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_for_match(attr->in_mdev->priv.eswitch, - attr->in_rep->vport)); + mlx5_eswitch_get_vport_metadata_for_match(src_esw, + vport)); misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, @@ -278,12 +284,12 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; } else { misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(attr->in_mdev, vhca_id)); + MLX5_CAP_GEN(src_esw->dev, vhca_id)); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); @@ -295,6 +301,299 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, } } +static int +esw_setup_decap_indir(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec) +{ + struct mlx5_flow_table *ft; + + if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE)) + return -EOPNOTSUPP; + + ft = mlx5_esw_indir_table_get(esw, attr, spec, + mlx5_esw_indir_table_decap_vport(attr), true); + return PTR_ERR_OR_ZERO(ft); +} + +static void +esw_cleanup_decap_indir(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr) +{ + if (mlx5_esw_indir_table_decap_vport(attr)) + mlx5_esw_indir_table_put(esw, attr, + mlx5_esw_indir_table_decap_vport(attr), + true); +} + +static int +esw_setup_ft_dest(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + int i) +{ + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = attr->dest_ft; + + if (mlx5_esw_indir_table_decap_vport(attr)) + return esw_setup_decap_indir(esw, attr, spec); + return 0; +} + +static void +esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_fs_chains *chains, + int i) +{ + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = mlx5_chains_get_tc_end_ft(chains); +} + +static int +esw_setup_chain_dest(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_fs_chains *chains, + u32 chain, u32 prio, u32 level, + int i) +{ + struct mlx5_flow_table *ft; + + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + ft = mlx5_chains_get_table(chains, chain, prio, level); + if (IS_ERR(ft)) + return PTR_ERR(ft); + + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = ft; + return 0; +} + +static void esw_put_dest_tables_loop(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, + int from, int to) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + int i; + + for (i = from; i < to; i++) + if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + mlx5_chains_put_table(chains, 0, 1, 0); + else if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, + esw_attr->dests[i].mdev)) + mlx5_esw_indir_table_put(esw, attr, esw_attr->dests[i].rep->vport, + false); +} + +static bool +esw_is_chain_src_port_rewrite(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr) +{ + int i; + + for (i = esw_attr->split_count; i < esw_attr->out_count; i++) + if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE) + return true; + return false; +} + +static int +esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, + struct mlx5_fs_chains *chains, + struct mlx5_flow_attr *attr, + int *i) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int j, err; + + if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE)) + return -EOPNOTSUPP; + + for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) { + err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i); + if (err) + goto err_setup_chain; + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = esw_attr->dests[j].pkt_reformat; + } + return 0; + +err_setup_chain: + esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, j); + return err; +} + +static void esw_cleanup_chain_src_port_rewrite(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + + esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count); +} + +static bool +esw_is_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int i; + + for (i = esw_attr->split_count; i < esw_attr->out_count; i++) + if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, + esw_attr->dests[i].mdev)) + return true; + return false; +} + +static int +esw_setup_indir_table(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + bool ignore_flow_lvl, + int *i) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int j, err; + + if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE)) + return -EOPNOTSUPP; + + for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) { + if (ignore_flow_lvl) + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[*i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + + dest[*i].ft = mlx5_esw_indir_table_get(esw, attr, spec, + esw_attr->dests[j].rep->vport, false); + if (IS_ERR(dest[*i].ft)) { + err = PTR_ERR(dest[*i].ft); + goto err_indir_tbl_get; + } + } + + if (mlx5_esw_indir_table_decap_vport(attr)) { + err = esw_setup_decap_indir(esw, attr, spec); + if (err) + goto err_indir_tbl_get; + } + + return 0; + +err_indir_tbl_get: + esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, j); + return err; +} + +static void esw_cleanup_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + + esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, esw_attr->out_count); + esw_cleanup_decap_indir(esw, attr); +} + +static void +esw_cleanup_chain_dest(struct mlx5_fs_chains *chains, u32 chain, u32 prio, u32 level) +{ + mlx5_chains_put_table(chains, chain, prio, level); +} + +static void +esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr, + int attr_idx, int dest_idx, bool pkt_reformat) +{ + dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport; + dest[dest_idx].vport.vhca_id = + MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) { + if (pkt_reformat) { + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat; + } + dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; + dest[dest_idx].vport.pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat; + } +} + +static int +esw_setup_vport_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *esw_attr, + int i) +{ + int j; + + for (j = esw_attr->split_count; j < esw_attr->out_count; j++, i++) + esw_setup_vport_dest(dest, flow_act, esw, esw_attr, j, i, true); + return i; +} + +static int +esw_setup_dests(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr, + struct mlx5_flow_spec *spec, + int *i) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + int err = 0; + + if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) && + MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve) && + mlx5_eswitch_vport_match_metadata_enabled(esw)) + attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE; + + if (attr->dest_ft) { + esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); + (*i)++; + } else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { + esw_setup_slow_path_dest(dest, flow_act, chains, *i); + (*i)++; + } else if (attr->dest_chain) { + err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, + 1, 0, *i); + (*i)++; + } else if (esw_is_indir_table(esw, attr)) { + err = esw_setup_indir_table(dest, flow_act, esw, attr, spec, true, i); + } else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) { + err = esw_setup_chain_src_port_rewrite(dest, flow_act, esw, chains, attr, i); + } else { + *i = esw_setup_vport_dests(dest, flow_act, esw, esw_attr, *i); + } + + return err; +} + +static void +esw_cleanup_dests(struct mlx5_eswitch *esw, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_fs_chains *chains = esw_chains(esw); + + if (attr->dest_ft) { + esw_cleanup_decap_indir(esw, attr); + } else if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)) { + if (attr->dest_chain) + esw_cleanup_chain_dest(chains, attr->dest_chain, 1, 0); + else if (esw_is_indir_table(esw, attr)) + esw_cleanup_indir_table(esw, attr); + else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) + esw_cleanup_chain_src_port_rewrite(esw, attr); + } +} + struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, @@ -308,7 +607,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr fwd_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; - int j, i = 0; + int i = 0; if (esw->mode != MLX5_ESWITCH_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); @@ -329,50 +628,15 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } } + mlx5_eswitch_set_rule_flow_source(esw, spec, esw_attr); + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { - struct mlx5_flow_table *ft; - - if (attr->dest_ft) { - flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; - dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[i].ft = attr->dest_ft; - i++; - } else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { - flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; - dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[i].ft = mlx5_chains_get_tc_end_ft(chains); - i++; - } else if (attr->dest_chain) { - flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; - ft = mlx5_chains_get_table(chains, attr->dest_chain, - 1, 0); - if (IS_ERR(ft)) { - rule = ERR_CAST(ft); - goto err_create_goto_table; - } - - dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[i].ft = ft; - i++; - } else { - for (j = esw_attr->split_count; j < esw_attr->out_count; j++) { - dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[i].vport.num = esw_attr->dests[j].rep->vport; - dest[i].vport.vhca_id = - MLX5_CAP_GEN(esw_attr->dests[j].mdev, vhca_id); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - dest[i].vport.flags |= - MLX5_FLOW_DEST_VPORT_VHCA_ID; - if (esw_attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) { - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - flow_act.pkt_reformat = - esw_attr->dests[j].pkt_reformat; - dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; - dest[i].vport.pkt_reformat = - esw_attr->dests[j].pkt_reformat; - } - i++; - } + int err; + + err = esw_setup_dests(dest, &flow_act, esw, attr, spec, &i); + if (err) { + rule = ERR_PTR(err); + goto err_create_goto_table; } } @@ -407,15 +671,15 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, fdb = attr->ft; if (!(attr->flags & MLX5_ESW_ATTR_FLAG_NO_IN_PORT)) - mlx5_eswitch_set_rule_source_port(esw, spec, esw_attr); + mlx5_eswitch_set_rule_source_port(esw, spec, attr, + esw_attr->in_mdev->priv.eswitch, + esw_attr->in_rep->vport); } if (IS_ERR(fdb)) { rule = ERR_CAST(fdb); goto err_esw_get; } - mlx5_eswitch_set_rule_flow_source(esw, spec, esw_attr); - if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec)) rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, esw_attr, &flow_act, dest, i); @@ -434,8 +698,7 @@ err_add_rule: else if (attr->chain || attr->prio) mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); err_esw_get: - if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) && attr->dest_chain) - mlx5_chains_put_table(chains, attr->dest_chain, 1, 0); + esw_cleanup_dests(esw, attr); err_create_goto_table: return rule; } @@ -453,7 +716,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_flow_table *fast_fdb; struct mlx5_flow_table *fwd_fdb; struct mlx5_flow_handle *rule; - int i; + int i, err = 0; fast_fdb = mlx5_chains_get_table(chains, attr->chain, attr->prio, 0); if (IS_ERR(fast_fdb)) { @@ -472,22 +735,26 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; for (i = 0; i < esw_attr->split_count; i++) { - dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[i].vport.num = esw_attr->dests[i].rep->vport; - dest[i].vport.vhca_id = - MLX5_CAP_GEN(esw_attr->dests[i].mdev, vhca_id); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; - if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) { - dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; - dest[i].vport.pkt_reformat = esw_attr->dests[i].pkt_reformat; + if (esw_is_indir_table(esw, attr)) + err = esw_setup_indir_table(dest, &flow_act, esw, attr, spec, false, &i); + else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) + err = esw_setup_chain_src_port_rewrite(dest, &flow_act, esw, chains, attr, + &i); + else + esw_setup_vport_dest(dest, &flow_act, esw, esw_attr, i, i, false); + + if (err) { + rule = ERR_PTR(err); + goto err_chain_src_rewrite; } } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[i].ft = fwd_fdb; i++; - mlx5_eswitch_set_rule_source_port(esw, spec, esw_attr); + mlx5_eswitch_set_rule_source_port(esw, spec, attr, + esw_attr->in_mdev->priv.eswitch, + esw_attr->in_rep->vport); if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; @@ -495,13 +762,16 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); - if (IS_ERR(rule)) - goto add_err; + if (IS_ERR(rule)) { + i = esw_attr->split_count; + goto err_chain_src_rewrite; + } atomic64_inc(&esw->offloads.num_flows); return rule; -add_err: +err_chain_src_rewrite: + esw_put_dest_tables_loop(esw, attr, 0, i); esw_vport_tbl_put(esw, &fwd_attr); err_get_fwd: mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); @@ -542,13 +812,13 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, if (fwd_rule) { esw_vport_tbl_put(esw, &fwd_attr); mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); + esw_put_dest_tables_loop(esw, attr, 0, esw_attr->split_count); } else { if (split) esw_vport_tbl_put(esw, &fwd_attr); else if (attr->chain || attr->prio) mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); - if (attr->dest_chain) - mlx5_chains_put_table(chains, attr->dest_chain, 1, 0); + esw_cleanup_dests(esw, attr); } } @@ -810,6 +1080,81 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } +static void mlx5_eswitch_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_handle **flows = esw->fdb_table.offloads.send_to_vport_meta_rules; + int i = 0, num_vfs = esw->esw_funcs.num_vfs, vport_num; + + if (!num_vfs || !flows) + return; + + mlx5_esw_for_each_vf_vport_num(esw, vport_num, num_vfs) + mlx5_del_flow_rules(flows[i++]); + + kvfree(flows); +} + +static int +mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw) +{ + int num_vfs, vport_num, rule_idx = 0, err = 0; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_handle **flows; + struct mlx5_flow_spec *spec; + + num_vfs = esw->esw_funcs.num_vfs; + flows = kvzalloc(num_vfs * sizeof(*flows), GFP_KERNEL); + if (!flows) + return -ENOMEM; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + err = -ENOMEM; + goto alloc_err; + } + + MLX5_SET(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_1, + ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + mlx5_esw_for_each_vf_vport_num(esw, vport_num, num_vfs) { + MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num)); + dest.vport.num = vport_num; + + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule idx %d, err %ld\n", + rule_idx, PTR_ERR(flow_rule)); + goto rule_err; + } + flows[rule_idx++] = flow_rule; + } + + esw->fdb_table.offloads.send_to_vport_meta_rules = flows; + kvfree(spec); + return 0; + +rule_err: + while (--rule_idx >= 0) + mlx5_del_flow_rules(flows[rule_idx]); + kvfree(spec); +alloc_err: + kvfree(flows); + return err; +} + static bool mlx5_eswitch_reg_c1_loopback_supported(struct mlx5_eswitch *esw) { return MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) & @@ -1292,11 +1637,11 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_table_attr ft_attr = {}; + int num_vfs, table_size, ix, err = 0; struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb = NULL; u32 flags = 0, *flow_group_in; - int table_size, ix, err = 0; struct mlx5_flow_group *g; void *match_criteria; u8 *dmac; @@ -1322,7 +1667,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) } table_size = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ + - MLX5_ESW_MISS_FLOWS + esw->total_vports; + MLX5_ESW_MISS_FLOWS + esw->total_vports + esw->esw_funcs.num_vfs; /* create the slow path fdb with encap set, so further table instances * can be created at run time while VFs are probed if the FW allows that. @@ -1370,6 +1715,38 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) } esw->fdb_table.offloads.send_to_vport_grp = g; + /* meta send to vport */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + + num_vfs = esw->esw_funcs.num_vfs; + if (num_vfs) { + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + num_vfs - 1); + ix += num_vfs; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create send-to-vport meta flow group err(%d)\n", + err); + goto send_vport_meta_err; + } + esw->fdb_table.offloads.send_to_vport_meta_grp = g; + + err = mlx5_eswitch_add_send_to_vport_meta_rules(esw); + if (err) + goto meta_rule_err; + } + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { /* create peer esw miss group */ memset(flow_group_in, 0, inlen); @@ -1437,6 +1814,11 @@ miss_err: if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); peer_miss_err: + mlx5_eswitch_del_send_to_vport_meta_rules(esw); +meta_rule_err: + if (esw->fdb_table.offloads.send_to_vport_meta_grp) + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp); +send_vport_meta_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: esw_chains_destroy(esw, esw_chains(esw)); @@ -1458,7 +1840,10 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) esw_debug(esw->dev, "Destroy offloads FDB Tables\n"); mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi); mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); + mlx5_eswitch_del_send_to_vport_meta_rules(esw); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); + if (esw->fdb_table.offloads.send_to_vport_meta_grp) + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_meta_grp); if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); @@ -2182,12 +2567,20 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) static int esw_offloads_steering_init(struct mlx5_eswitch *esw) { + struct mlx5_esw_indir_table *indir; int err; memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.vports.lock); hash_init(esw->fdb_table.offloads.vports.table); + indir = mlx5_esw_indir_table_init(); + if (IS_ERR(indir)) { + err = PTR_ERR(indir); + goto create_indir_err; + } + esw->fdb_table.offloads.indir = indir; + err = esw_create_uplink_offloads_acl_tables(esw); if (err) goto create_acl_err; @@ -2219,6 +2612,8 @@ create_restore_err: create_offloads_err: esw_destroy_uplink_offloads_acl_tables(esw); create_acl_err: + mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir); +create_indir_err: mutex_destroy(&esw->fdb_table.offloads.vports.lock); return err; } @@ -2230,6 +2625,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_destroy_restore_table(esw); esw_destroy_offloads_table(esw); esw_destroy_uplink_offloads_acl_tables(esw); + mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir); mutex_destroy(&esw->fdb_table.offloads.vports.lock); } @@ -2867,3 +3263,94 @@ void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) mlx5_esw_devlink_sf_port_unregister(esw, vport_num); mlx5_esw_vport_disable(esw, vport_num); } + +static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num, u16 *vhca_id) +{ + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *query_ctx; + void *hca_caps; + int err; + + *vhca_id = 0; + if (mlx5_esw_is_manager_vport(esw, vport_num) || + !MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) + return -EPERM; + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx); + if (err) + goto out_free; + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + *vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id); + +out_free: + kfree(query_ctx); + return err; +} + +int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num) +{ + u16 *old_entry, *vhca_map_entry, vhca_id; + int err; + + err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id); + if (err) { + esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%u,err=%d)\n", + vport_num, err); + return err; + } + + vhca_map_entry = kmalloc(sizeof(*vhca_map_entry), GFP_KERNEL); + if (!vhca_map_entry) + return -ENOMEM; + + *vhca_map_entry = vport_num; + old_entry = xa_store(&esw->offloads.vhca_map, vhca_id, vhca_map_entry, GFP_KERNEL); + if (xa_is_err(old_entry)) { + kfree(vhca_map_entry); + return xa_err(old_entry); + } + kfree(old_entry); + return 0; +} + +void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num) +{ + u16 *vhca_map_entry, vhca_id; + int err; + + err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id); + if (err) + esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%hu,err=%d)\n", + vport_num, err); + + vhca_map_entry = xa_erase(&esw->offloads.vhca_map, vhca_id); + kfree(vhca_map_entry); +} + +int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num) +{ + u16 *res = xa_load(&esw->offloads.vhca_map, vhca_id); + + if (!res) + return -ENOENT; + + *vport_num = *res; + return 0; +} + +u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, + u16 vport_num) +{ + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); + + if (WARN_ON_ONCE(IS_ERR(vport))) + return 0; + + return vport->metadata; +} +EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_set); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 11b89a3a2ff5..66ad599bd488 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -572,7 +572,7 @@ static void del_hw_fte(struct fs_node *node) mlx5_core_warn(dev, "flow steering can't delete fte in index %d of flow group id %d\n", fte->index, fg->id); - node->active = 0; + node->active = false; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 3754ef98554f..efe403c7e354 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -270,5 +270,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev); void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup); int mlx5_load_one(struct mlx5_core_dev *dev, bool boot); +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out); + void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work); #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index daf63a8115e0..c4bf555c25ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -37,6 +37,7 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia mdev->iseg = ioremap(mdev->iseg_base, sizeof(*mdev->iseg)); if (!mdev->iseg) { mlx5_core_warn(mdev, "remap error\n"); + err = -ENOMEM; goto remap_err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index ba78e0660523..e05c5c0f3ae1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1164,3 +1164,15 @@ u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev) return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev) + mlx5_sf_max_functions(dev); } EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports); + +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out) +{ + u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01); + u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {}; + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, op_mod, opmod); + MLX5_SET(query_hca_cap_in, in, function_id, function_id); + MLX5_SET(query_hca_cap_in, in, other_function, true); + return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index cf111e73f81e..9ce90841f92d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -4943,6 +4943,25 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, } static void +mlxsw_sp_fib4_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp, + struct fib_entry_notifier_info *fen_info) +{ + u32 *p_dst = (u32 *) &fen_info->dst; + struct fib_rt_info fri; + + fri.fi = fen_info->fi; + fri.tb_id = fen_info->tb_id; + fri.dst = cpu_to_be32(*p_dst); + fri.dst_len = fen_info->dst_len; + fri.tos = fen_info->tos; + fri.type = fen_info->type; + fri.offload = false; + fri.trap = false; + fri.offload_failed = true; + fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri); +} + +static void mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { @@ -4963,6 +4982,7 @@ mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, fri.type = fib4_entry->type; fri.offload = should_offload; fri.trap = !should_offload; + fri.offload_failed = false; fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri); } @@ -4985,11 +5005,36 @@ mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, fri.type = fib4_entry->type; fri.offload = false; fri.trap = false; + fri.offload_failed = false; fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri); } #if IS_ENABLED(CONFIG_IPV6) static void +mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp, + struct fib6_info **rt_arr, + unsigned int nrt6) +{ + int i; + + /* In IPv6 a multipath route is represented using multiple routes, so + * we need to set the flags on all of them. + */ + for (i = 0; i < nrt6; i++) + fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), rt_arr[i], + false, false, true); +} +#else +static void +mlxsw_sp_fib6_offload_failed_flag_set(struct mlxsw_sp *mlxsw_sp, + struct fib6_info **rt_arr, + unsigned int nrt6) +{ +} +#endif + +#if IS_ENABLED(CONFIG_IPV6) +static void mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { @@ -5006,7 +5051,7 @@ mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, common); list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt, - should_offload, !should_offload); + should_offload, !should_offload, false); } #else static void @@ -5028,7 +5073,7 @@ mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, common); list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt, - false, false); + false, false, false); } #else static void @@ -7021,6 +7066,8 @@ static void mlxsw_sp_router_fib4_event_process(struct mlxsw_sp *mlxsw_sp, if (err) { mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); mlxsw_sp_router_fib_abort(mlxsw_sp); + mlxsw_sp_fib4_offload_failed_flag_set(mlxsw_sp, + &fib_event->fen_info); } fib_info_put(fib_event->fen_info.fi); break; @@ -7042,6 +7089,7 @@ static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_event *fib_event) { + struct mlxsw_sp_fib6_event *fib6_event = &fib_event->fib6_event; int err; mlxsw_sp_span_respin(mlxsw_sp); @@ -7053,6 +7101,9 @@ static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp, if (err) { mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); mlxsw_sp_router_fib_abort(mlxsw_sp); + mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp, + fib6_event->rt_arr, + fib6_event->nrt6); } mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event); break; @@ -7062,6 +7113,9 @@ static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp, if (err) { mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); mlxsw_sp_router_fib_abort(mlxsw_sp); + mlxsw_sp_fib6_offload_failed_flag_set(mlxsw_sp, + fib6_event->rt_arr, + fib6_event->nrt6); } mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event); break; diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index f8b85ab8be5d..1654a6e22a7d 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -370,6 +370,60 @@ static void ocelot_vlan_init(struct ocelot *ocelot) } } +static u32 ocelot_read_eq_avail(struct ocelot *ocelot, int port) +{ + return ocelot_read_rix(ocelot, QSYS_SW_STATUS, port); +} + +int ocelot_port_flush(struct ocelot *ocelot, int port) +{ + int err, val; + + /* Disable dequeuing from the egress queues */ + ocelot_rmw_rix(ocelot, QSYS_PORT_MODE_DEQUEUE_DIS, + QSYS_PORT_MODE_DEQUEUE_DIS, + QSYS_PORT_MODE, port); + + /* Disable flow control */ + ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0); + + /* Disable priority flow control */ + ocelot_fields_write(ocelot, port, + QSYS_SWITCH_PORT_MODE_TX_PFC_ENA, 0); + + /* Wait at least the time it takes to receive a frame of maximum length + * at the port. + * Worst-case delays for 10 kilobyte jumbo frames are: + * 8 ms on a 10M port + * 800 μs on a 100M port + * 80 μs on a 1G port + * 32 μs on a 2.5G port + */ + usleep_range(8000, 10000); + + /* Disable half duplex backpressure. */ + ocelot_rmw_rix(ocelot, 0, SYS_FRONT_PORT_MODE_HDX_MODE, + SYS_FRONT_PORT_MODE, port); + + /* Flush the queues associated with the port. */ + ocelot_rmw_gix(ocelot, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG_FLUSH_ENA, + REW_PORT_CFG, port); + + /* Enable dequeuing from the egress queues. */ + ocelot_rmw_rix(ocelot, 0, QSYS_PORT_MODE_DEQUEUE_DIS, QSYS_PORT_MODE, + port); + + /* Wait until flushing is complete. */ + err = read_poll_timeout(ocelot_read_eq_avail, val, !val, + 100, 2000000, false, ocelot, port); + + /* Clear flushing again. */ + ocelot_rmw_gix(ocelot, 0, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG, port); + + return err; +} +EXPORT_SYMBOL(ocelot_port_flush); + void ocelot_adjust_link(struct ocelot *ocelot, int port, struct phy_device *phydev) { diff --git a/drivers/net/ethernet/mscc/ocelot_io.c b/drivers/net/ethernet/mscc/ocelot_io.c index 0acb45948418..ea4e83410fe4 100644 --- a/drivers/net/ethernet/mscc/ocelot_io.c +++ b/drivers/net/ethernet/mscc/ocelot_io.c @@ -71,6 +71,14 @@ void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg) } EXPORT_SYMBOL(ocelot_port_writel); +void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg) +{ + u32 cur = ocelot_port_readl(port, reg); + + ocelot_port_writel(port, (cur & (~mask)) | val, reg); +} +EXPORT_SYMBOL(ocelot_port_rmwl); + u32 __ocelot_target_read_ix(struct ocelot *ocelot, enum ocelot_target target, u32 reg, u32 offset) { diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 3efc5899f656..2e62a2c4eb63 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -168,6 +168,12 @@ struct qede_dump_info { u32 args[QEDE_DUMP_MAX_ARGS]; }; +struct qede_coalesce { + bool isvalid; + u16 rxc; + u16 txc; +}; + struct qede_dev { struct qed_dev *cdev; struct net_device *ndev; @@ -194,6 +200,7 @@ struct qede_dev { ((edev)->dev_info.common.dev_type == QED_DEV_TYPE_AH) struct qede_fastpath *fp_array; + struct qede_coalesce *coal_entry; u8 req_num_tx; u8 fp_num_tx; u8 req_num_rx; @@ -581,6 +588,9 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, struct flow_cls_offload *f); void qede_forced_speed_maps_init(void); +int qede_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal); +int qede_set_per_coalesce(struct net_device *dev, u32 queue, + struct ethtool_coalesce *coal); #define RX_RING_SIZE_POW 13 #define RX_RING_SIZE ((u16)BIT(RX_RING_SIZE_POW)) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index bedbb85a179a..1560ad3d9290 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -819,8 +819,7 @@ out: return rc; } -static int qede_set_coalesce(struct net_device *dev, - struct ethtool_coalesce *coal) +int qede_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal) { struct qede_dev *edev = netdev_priv(dev); struct qede_fastpath *fp; @@ -855,6 +854,8 @@ static int qede_set_coalesce(struct net_device *dev, "Set RX coalesce error, rc = %d\n", rc); return rc; } + edev->coal_entry[i].rxc = rxc; + edev->coal_entry[i].isvalid = true; } if (edev->fp_array[i].type & QEDE_FASTPATH_TX) { @@ -874,6 +875,8 @@ static int qede_set_coalesce(struct net_device *dev, "Set TX coalesce error, rc = %d\n", rc); return rc; } + edev->coal_entry[i].txc = txc; + edev->coal_entry[i].isvalid = true; } } @@ -2105,6 +2108,129 @@ err: return rc; } +int qede_set_per_coalesce(struct net_device *dev, u32 queue, + struct ethtool_coalesce *coal) +{ + struct qede_dev *edev = netdev_priv(dev); + struct qede_fastpath *fp; + u16 rxc, txc; + int rc = 0; + + if (coal->rx_coalesce_usecs > QED_COALESCE_MAX || + coal->tx_coalesce_usecs > QED_COALESCE_MAX) { + DP_INFO(edev, + "Can't support requested %s coalesce value [max supported value %d]\n", + coal->rx_coalesce_usecs > QED_COALESCE_MAX ? "rx" + : "tx", + QED_COALESCE_MAX); + return -EINVAL; + } + + rxc = (u16)coal->rx_coalesce_usecs; + txc = (u16)coal->tx_coalesce_usecs; + + __qede_lock(edev); + if (queue >= edev->num_queues) { + DP_INFO(edev, "Invalid queue\n"); + rc = -EINVAL; + goto out; + } + + if (edev->state != QEDE_STATE_OPEN) { + rc = -EINVAL; + goto out; + } + + fp = &edev->fp_array[queue]; + + if (edev->fp_array[queue].type & QEDE_FASTPATH_RX) { + rc = edev->ops->common->set_coalesce(edev->cdev, + rxc, 0, + fp->rxq->handle); + if (rc) { + DP_INFO(edev, + "Set RX coalesce error, rc = %d\n", rc); + goto out; + } + edev->coal_entry[queue].rxc = rxc; + edev->coal_entry[queue].isvalid = true; + } + + if (edev->fp_array[queue].type & QEDE_FASTPATH_TX) { + rc = edev->ops->common->set_coalesce(edev->cdev, + 0, txc, + fp->txq->handle); + if (rc) { + DP_INFO(edev, + "Set TX coalesce error, rc = %d\n", rc); + goto out; + } + edev->coal_entry[queue].txc = txc; + edev->coal_entry[queue].isvalid = true; + } +out: + __qede_unlock(edev); + + return rc; +} + +static int qede_get_per_coalesce(struct net_device *dev, + u32 queue, + struct ethtool_coalesce *coal) +{ + void *rx_handle = NULL, *tx_handle = NULL; + struct qede_dev *edev = netdev_priv(dev); + struct qede_fastpath *fp; + u16 rx_coal, tx_coal; + int rc = 0; + + rx_coal = QED_DEFAULT_RX_USECS; + tx_coal = QED_DEFAULT_TX_USECS; + + memset(coal, 0, sizeof(struct ethtool_coalesce)); + + __qede_lock(edev); + if (queue >= edev->num_queues) { + DP_INFO(edev, "Invalid queue\n"); + rc = -EINVAL; + goto out; + } + + if (edev->state != QEDE_STATE_OPEN) { + rc = -EINVAL; + goto out; + } + + fp = &edev->fp_array[queue]; + + if (fp->type & QEDE_FASTPATH_RX) + rx_handle = fp->rxq->handle; + + rc = edev->ops->get_coalesce(edev->cdev, &rx_coal, + rx_handle); + if (rc) { + DP_INFO(edev, "Read Rx coalesce error\n"); + goto out; + } + + fp = &edev->fp_array[queue]; + if (fp->type & QEDE_FASTPATH_TX) + tx_handle = fp->txq->handle; + + rc = edev->ops->get_coalesce(edev->cdev, &tx_coal, + tx_handle); + if (rc) + DP_INFO(edev, "Read Tx coalesce error\n"); + +out: + __qede_unlock(edev); + + coal->rx_coalesce_usecs = rx_coal; + coal->tx_coalesce_usecs = tx_coal; + + return rc; +} + static const struct ethtool_ops qede_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .get_link_ksettings = qede_get_link_ksettings, @@ -2148,6 +2274,8 @@ static const struct ethtool_ops qede_ethtool_ops = { .set_fecparam = qede_set_fecparam, .get_tunable = qede_get_tunable, .set_tunable = qede_set_tunable, + .get_per_queue_coalesce = qede_get_per_coalesce, + .set_per_queue_coalesce = qede_set_per_coalesce, .flash_device = qede_flash_device, .get_dump_flag = qede_get_dump_flag, .get_dump_data = qede_get_dump_data, @@ -2177,6 +2305,8 @@ static const struct ethtool_ops qede_vf_ethtool_ops = { .set_rxfh = qede_set_rxfh, .get_channels = qede_get_channels, .set_channels = qede_set_channels, + .get_per_queue_coalesce = qede_get_per_coalesce, + .set_per_queue_coalesce = qede_set_per_coalesce, .get_tunable = qede_get_tunable, .set_tunable = qede_set_tunable, }; diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index 70c8d3cd85c0..8c47a9d2a965 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1450,7 +1450,8 @@ int qede_poll(struct napi_struct *napi, int budget) rx_work_done = (likely(fp->type & QEDE_FASTPATH_RX) && qede_has_rx_work(fp->rxq)) ? qede_rx_int(fp, budget) : 0; - if (rx_work_done < budget) { + /* Handle case where we are called by netpoll with a budget of 0 */ + if (rx_work_done < budget || !budget) { if (!qede_poll_is_more_work(fp)) { napi_complete_done(napi, rx_work_done); diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 4bf94797aac5..4d952036ba82 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -904,6 +904,7 @@ static int qede_alloc_fp_array(struct qede_dev *edev) { u8 fp_combined, fp_rx = edev->fp_num_rx; struct qede_fastpath *fp; + void *mem; int i; edev->fp_array = kcalloc(QEDE_QUEUE_CNT(edev), @@ -913,6 +914,15 @@ static int qede_alloc_fp_array(struct qede_dev *edev) goto err; } + mem = krealloc(edev->coal_entry, QEDE_QUEUE_CNT(edev) * + sizeof(*edev->coal_entry), GFP_KERNEL); + if (!mem) { + DP_ERR(edev, "coalesce entry allocation failed\n"); + kfree(edev->coal_entry); + goto err; + } + edev->coal_entry = mem; + fp_combined = QEDE_QUEUE_CNT(edev) - fp_rx - edev->fp_num_tx; /* Allocate the FP elements for Rx queues followed by combined and then @@ -1320,8 +1330,10 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) * [e.g., QED register callbacks] won't break anything when * accessing the netdevice. */ - if (mode != QEDE_REMOVE_RECOVERY) + if (mode != QEDE_REMOVE_RECOVERY) { + kfree(edev->coal_entry); free_netdev(ndev); + } dev_info(&pdev->dev, "Ending qede_remove successfully\n"); } @@ -2328,8 +2340,9 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode, bool is_locked) { struct qed_link_params link_params; + struct ethtool_coalesce coal = {}; u8 num_tc; - int rc; + int rc, i; DP_INFO(edev, "Starting qede load\n"); @@ -2390,6 +2403,18 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode, edev->state = QEDE_STATE_OPEN; + coal.rx_coalesce_usecs = QED_DEFAULT_RX_USECS; + coal.tx_coalesce_usecs = QED_DEFAULT_TX_USECS; + + for_each_queue(i) { + if (edev->coal_entry[i].isvalid) { + coal.rx_coalesce_usecs = edev->coal_entry[i].rxc; + coal.tx_coalesce_usecs = edev->coal_entry[i].txc; + } + __qede_unlock(edev); + qede_set_per_coalesce(edev->ndev, i, &coal); + __qede_lock(edev); + } DP_INFO(edev, "Ending successfully qede load\n"); goto out; diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 04231585ef79..9197da8e626a 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2036,9 +2036,12 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii) { 0x7c8, 0x348, RTL_GIGA_MAC_VER_09 }, { 0x7c8, 0x248, RTL_GIGA_MAC_VER_09 }, { 0x7c8, 0x340, RTL_GIGA_MAC_VER_16 }, - /* FIXME: where did these entries come from ? -- FR */ - { 0xfc8, 0x388, RTL_GIGA_MAC_VER_13 }, - { 0xfc8, 0x308, RTL_GIGA_MAC_VER_13 }, + /* FIXME: where did these entries come from ? -- FR + * Not even r8101 vendor driver knows these id's, + * so let's disable detection for now. -- HK + * { 0xfc8, 0x388, RTL_GIGA_MAC_VER_13 }, + * { 0xfc8, 0x308, RTL_GIGA_MAC_VER_13 }, + */ /* 8110 family. */ { 0xfc8, 0x980, RTL_GIGA_MAC_VER_06 }, @@ -4584,10 +4587,10 @@ static int rtl8169_poll(struct napi_struct *napi, int budget) struct net_device *dev = tp->dev; int work_done; - work_done = rtl_rx(dev, tp, budget); - rtl_tx(dev, tp, budget); + work_done = rtl_rx(dev, tp, budget); + if (work_done < budget && napi_complete_done(napi, work_done)) rtl_irq_enable(tp); @@ -4804,9 +4807,12 @@ static void rtl8169_net_suspend(struct rtl8169_private *tp) #ifdef CONFIG_PM -static int rtl8169_net_resume(struct rtl8169_private *tp) +static int rtl8169_runtime_resume(struct device *dev) { + struct rtl8169_private *tp = dev_get_drvdata(dev); + rtl_rar_set(tp, tp->dev->dev_addr); + __rtl8169_set_wol(tp, tp->saved_wolopts); if (tp->TxDescArray) rtl8169_up(tp); @@ -4840,7 +4846,7 @@ static int __maybe_unused rtl8169_resume(struct device *device) if (tp->mac_version == RTL_GIGA_MAC_VER_37) rtl_init_rxcfg(tp); - return rtl8169_net_resume(tp); + return rtl8169_runtime_resume(device); } static int rtl8169_runtime_suspend(struct device *device) @@ -4860,15 +4866,6 @@ static int rtl8169_runtime_suspend(struct device *device) return 0; } -static int rtl8169_runtime_resume(struct device *device) -{ - struct rtl8169_private *tp = dev_get_drvdata(device); - - __rtl8169_set_wol(tp, tp->saved_wolopts); - - return rtl8169_net_resume(tp); -} - static int rtl8169_runtime_idle(struct device *device) { struct rtl8169_private *tp = dev_get_drvdata(device); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c index ba0e4d2b256a..6c19fcc76c6f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c @@ -74,8 +74,6 @@ MODULE_DEVICE_TABLE(of, intel_eth_plat_match); static int intel_eth_plat_probe(struct platform_device *pdev) { - struct net_device *ndev = platform_get_drvdata(pdev); - struct stmmac_priv *priv = netdev_priv(ndev); struct plat_stmmacenet_data *plat_dat; struct stmmac_resources stmmac_res; const struct of_device_id *match; @@ -83,7 +81,6 @@ static int intel_eth_plat_probe(struct platform_device *pdev) unsigned long rate; int ret; - plat_dat = priv->plat; ret = stmmac_get_platform_resources(pdev, &stmmac_res); if (ret) return ret; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 8ed3b2c834a0..56985542e202 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -324,7 +324,12 @@ static int tc_setup_cbs(struct stmmac_priv *priv, priv->plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_AVB; } else if (!qopt->enable) { - return stmmac_dma_qmode(priv, priv->ioaddr, queue, MTL_QUEUE_DCB); + ret = stmmac_dma_qmode(priv, priv->ioaddr, queue, + MTL_QUEUE_DCB); + if (ret) + return ret; + + priv->plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_DCB; } /* Port Transmit Rate and Speed Divider */ diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index abfc4c435d59..affcf92cd3aa 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -92,6 +92,7 @@ config TI_CPTS config TI_K3_AM65_CPSW_NUSS tristate "TI K3 AM654x/J721E CPSW Ethernet driver" depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER + select NET_DEVLINK select TI_DAVINCI_MDIO imply PHY_TI_GMII_SEL depends on TI_K3_AM65_CPTS || !TI_K3_AM65_CPTS @@ -105,6 +106,15 @@ config TI_K3_AM65_CPSW_NUSS To compile this driver as a module, choose M here: the module will be called ti-am65-cpsw-nuss. +config TI_K3_AM65_CPSW_SWITCHDEV + bool "TI K3 AM654x/J721E CPSW Switch mode support" + depends on TI_K3_AM65_CPSW_NUSS + depends on NET_SWITCHDEV + help + This enables switchdev support for TI K3 CPSWxG Ethernet + Switch. Enable this driver to support hardware switch support for AM65 + CPSW NUSS driver. + config TI_K3_AM65_CPTS tristate "TI K3 AM65x CPTS" depends on ARCH_K3 && OF diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile index 6e779292545d..75f761efbea7 100644 --- a/drivers/net/ethernet/ti/Makefile +++ b/drivers/net/ethernet/ti/Makefile @@ -26,4 +26,5 @@ keystone_netcp_ethss-y := netcp_ethss.o netcp_sgmii.o netcp_xgbepcsr.o cpsw_ale. obj-$(CONFIG_TI_K3_AM65_CPSW_NUSS) += ti-am65-cpsw-nuss.o ti-am65-cpsw-nuss-y := am65-cpsw-nuss.o cpsw_sl.o am65-cpsw-ethtool.o cpsw_ale.o k3-cppi-desc-pool.o am65-cpsw-qos.o +ti-am65-cpsw-nuss-$(CONFIG_TI_K3_AM65_CPSW_SWITCHDEV) += am65-cpsw-switchdev.o obj-$(CONFIG_TI_K3_AM65_CPTS) += am65-cpts.o diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 1850743c04da..638d7b03be4b 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -31,6 +31,7 @@ #include "cpsw_ale.h" #include "cpsw_sl.h" #include "am65-cpsw-nuss.h" +#include "am65-cpsw-switchdev.h" #include "k3-cppi-desc-pool.h" #include "am65-cpts.h" @@ -228,6 +229,9 @@ static int am65_cpsw_nuss_ndo_slave_add_vid(struct net_device *ndev, u32 port_mask, unreg_mcast = 0; int ret; + if (!common->is_emac_mode) + return 0; + if (!netif_running(ndev) || !vid) return 0; @@ -255,6 +259,9 @@ static int am65_cpsw_nuss_ndo_slave_kill_vid(struct net_device *ndev, struct am65_cpsw_port *port = am65_ndev_to_port(ndev); int ret; + if (!common->is_emac_mode) + return 0; + if (!netif_running(ndev) || !vid) return 0; @@ -277,6 +284,11 @@ static void am65_cpsw_slave_set_promisc(struct am65_cpsw_port *port, { struct am65_cpsw_common *common = port->common; + if (promisc && !common->is_emac_mode) { + dev_dbg(common->dev, "promisc mode requested in switch mode"); + return; + } + if (promisc) { /* Enable promiscuous mode */ cpsw_ale_control_set(common->ale, port->port_id, @@ -408,6 +420,11 @@ void am65_cpsw_nuss_set_p0_ptype(struct am65_cpsw_common *common) writel(val, host_p->port_base + AM65_CPSW_PORT_REG_PRI_CTL); } +static void am65_cpsw_init_host_port_switch(struct am65_cpsw_common *common); +static void am65_cpsw_init_host_port_emac(struct am65_cpsw_common *common); +static void am65_cpsw_init_port_switch_ale(struct am65_cpsw_port *port); +static void am65_cpsw_init_port_emac_ale(struct am65_cpsw_port *port); + static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common, netdev_features_t features) { @@ -454,9 +471,6 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common, ALE_DEFAULT_THREAD_ID, 0); cpsw_ale_control_set(common->ale, HOST_PORT_NUM, ALE_DEFAULT_THREAD_ENABLE, 1); - if (AM65_CPSW_IS_CPSW2G(common)) - cpsw_ale_control_set(common->ale, HOST_PORT_NUM, - ALE_PORT_NOLEARN, 1); /* switch to vlan unaware mode */ cpsw_ale_control_set(common->ale, HOST_PORT_NUM, ALE_VLAN_AWARE, 1); cpsw_ale_control_set(common->ale, HOST_PORT_NUM, @@ -470,6 +484,11 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common, port_mask, port_mask, port_mask & ~ALE_PORT_HOST); + if (common->is_emac_mode) + am65_cpsw_init_host_port_emac(common); + else + am65_cpsw_init_host_port_switch(common); + for (i = 0; i < common->rx_chns.descs_num; i++) { skb = __netdev_alloc_skb_ip_align(NULL, AM65_CPSW_MAX_PACKET_SIZE, @@ -598,7 +617,6 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev) { struct am65_cpsw_common *common = am65_ndev_to_common(ndev); struct am65_cpsw_port *port = am65_ndev_to_port(ndev); - u32 port_mask; int ret, i; ret = pm_runtime_get_sync(common->dev); @@ -631,19 +649,10 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev) am65_cpsw_port_set_sl_mac(port, ndev->dev_addr); - if (port->slave.mac_only) { - /* enable mac-only mode on port */ - cpsw_ale_control_set(common->ale, port->port_id, - ALE_PORT_MACONLY, 1); - cpsw_ale_control_set(common->ale, port->port_id, - ALE_PORT_NOLEARN, 1); - } - - port_mask = BIT(port->port_id) | ALE_PORT_HOST; - cpsw_ale_add_ucast(common->ale, ndev->dev_addr, - HOST_PORT_NUM, ALE_SECURE, 0); - cpsw_ale_add_mcast(common->ale, ndev->broadcast, - port_mask, 0, 0, ALE_MCAST_FWD_2); + if (common->is_emac_mode) + am65_cpsw_init_port_emac_ale(port); + else + am65_cpsw_init_port_switch_ale(port); /* mac_sl should be configured via phy-link interface */ am65_cpsw_sl_ctl_reset(port); @@ -803,12 +812,13 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, new_skb = netdev_alloc_skb_ip_align(ndev, AM65_CPSW_MAX_PACKET_SIZE); if (new_skb) { + ndev_priv = netdev_priv(ndev); + am65_cpsw_nuss_set_offload_fwd_mark(skb, ndev_priv->offload_fwd_mark); skb_put(skb, pkt_len); skb->protocol = eth_type_trans(skb, ndev); am65_cpsw_nuss_rx_csum(skb, csum_info); napi_gro_receive(&common->napi_rx, skb); - ndev_priv = netdev_priv(ndev); stats = this_cpu_ptr(ndev_priv->stats); u64_stats_update_begin(&stats->syncp); @@ -1451,6 +1461,13 @@ static void am65_cpsw_nuss_ndo_get_stats(struct net_device *dev, stats->tx_dropped = dev->stats.tx_dropped; } +static struct devlink_port *am65_cpsw_ndo_get_devlink_port(struct net_device *ndev) +{ + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + + return &port->devlink_port; +} + static const struct net_device_ops am65_cpsw_nuss_netdev_ops = { .ndo_open = am65_cpsw_nuss_ndo_slave_open, .ndo_stop = am65_cpsw_nuss_ndo_slave_stop, @@ -1464,6 +1481,7 @@ static const struct net_device_ops am65_cpsw_nuss_netdev_ops = { .ndo_vlan_rx_kill_vid = am65_cpsw_nuss_ndo_slave_kill_vid, .ndo_do_ioctl = am65_cpsw_nuss_ndo_slave_ioctl, .ndo_setup_tc = am65_cpsw_qos_ndo_setup_tc, + .ndo_get_devlink_port = am65_cpsw_ndo_get_devlink_port, }; static void am65_cpsw_nuss_slave_disable_unused(struct am65_cpsw_port *port) @@ -2031,6 +2049,441 @@ static void am65_cpsw_nuss_cleanup_ndev(struct am65_cpsw_common *common) } } +static void am65_cpsw_port_offload_fwd_mark_update(struct am65_cpsw_common *common) +{ + int set_val = 0; + int i; + + if (common->br_members == (GENMASK(common->port_num, 1) & ~common->disabled_ports_mask)) + set_val = 1; + + dev_dbg(common->dev, "set offload_fwd_mark %d\n", set_val); + + for (i = 1; i <= common->port_num; i++) { + struct am65_cpsw_port *port = am65_common_get_port(common, i); + struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(port->ndev); + + priv->offload_fwd_mark = set_val; + } +} + +bool am65_cpsw_port_dev_check(const struct net_device *ndev) +{ + if (ndev->netdev_ops == &am65_cpsw_nuss_netdev_ops) { + struct am65_cpsw_common *common = am65_ndev_to_common(ndev); + + return !common->is_emac_mode; + } + + return false; +} + +static int am65_cpsw_netdevice_port_link(struct net_device *ndev, struct net_device *br_ndev) +{ + struct am65_cpsw_common *common = am65_ndev_to_common(ndev); + struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(ndev); + + if (!common->br_members) { + common->hw_bridge_dev = br_ndev; + } else { + /* This is adding the port to a second bridge, this is + * unsupported + */ + if (common->hw_bridge_dev != br_ndev) + return -EOPNOTSUPP; + } + + common->br_members |= BIT(priv->port->port_id); + + am65_cpsw_port_offload_fwd_mark_update(common); + + return NOTIFY_DONE; +} + +static void am65_cpsw_netdevice_port_unlink(struct net_device *ndev) +{ + struct am65_cpsw_common *common = am65_ndev_to_common(ndev); + struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(ndev); + + common->br_members &= ~BIT(priv->port->port_id); + + am65_cpsw_port_offload_fwd_mark_update(common); + + if (!common->br_members) + common->hw_bridge_dev = NULL; +} + +/* netdev notifier */ +static int am65_cpsw_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_changeupper_info *info; + int ret = NOTIFY_DONE; + + if (!am65_cpsw_port_dev_check(ndev)) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_CHANGEUPPER: + info = ptr; + + if (netif_is_bridge_master(info->upper_dev)) { + if (info->linking) + ret = am65_cpsw_netdevice_port_link(ndev, info->upper_dev); + else + am65_cpsw_netdevice_port_unlink(ndev); + } + break; + default: + return NOTIFY_DONE; + } + + return notifier_from_errno(ret); +} + +static int am65_cpsw_register_notifiers(struct am65_cpsw_common *cpsw) +{ + int ret = 0; + + if (AM65_CPSW_IS_CPSW2G(cpsw) || + !IS_REACHABLE(CONFIG_TI_K3_AM65_CPSW_SWITCHDEV)) + return 0; + + cpsw->am65_cpsw_netdevice_nb.notifier_call = &am65_cpsw_netdevice_event; + ret = register_netdevice_notifier(&cpsw->am65_cpsw_netdevice_nb); + if (ret) { + dev_err(cpsw->dev, "can't register netdevice notifier\n"); + return ret; + } + + ret = am65_cpsw_switchdev_register_notifiers(cpsw); + if (ret) + unregister_netdevice_notifier(&cpsw->am65_cpsw_netdevice_nb); + + return ret; +} + +static void am65_cpsw_unregister_notifiers(struct am65_cpsw_common *cpsw) +{ + if (AM65_CPSW_IS_CPSW2G(cpsw) || + !IS_REACHABLE(CONFIG_TI_K3_AM65_CPSW_SWITCHDEV)) + return; + + am65_cpsw_switchdev_unregister_notifiers(cpsw); + unregister_netdevice_notifier(&cpsw->am65_cpsw_netdevice_nb); +} + +static const struct devlink_ops am65_cpsw_devlink_ops = {}; + +static void am65_cpsw_init_stp_ale_entry(struct am65_cpsw_common *cpsw) +{ + cpsw_ale_add_mcast(cpsw->ale, eth_stp_addr, ALE_PORT_HOST, ALE_SUPER, 0, + ALE_MCAST_BLOCK_LEARN_FWD); +} + +static void am65_cpsw_init_host_port_switch(struct am65_cpsw_common *common) +{ + struct am65_cpsw_host *host = am65_common_get_host(common); + + writel(common->default_vlan, host->port_base + AM65_CPSW_PORT_VLAN_REG_OFFSET); + + am65_cpsw_init_stp_ale_entry(common); + + cpsw_ale_control_set(common->ale, HOST_PORT_NUM, ALE_P0_UNI_FLOOD, 1); + dev_dbg(common->dev, "Set P0_UNI_FLOOD\n"); + cpsw_ale_control_set(common->ale, HOST_PORT_NUM, ALE_PORT_NOLEARN, 0); +} + +static void am65_cpsw_init_host_port_emac(struct am65_cpsw_common *common) +{ + struct am65_cpsw_host *host = am65_common_get_host(common); + + writel(0, host->port_base + AM65_CPSW_PORT_VLAN_REG_OFFSET); + + cpsw_ale_control_set(common->ale, HOST_PORT_NUM, ALE_P0_UNI_FLOOD, 0); + dev_dbg(common->dev, "unset P0_UNI_FLOOD\n"); + + /* learning make no sense in multi-mac mode */ + cpsw_ale_control_set(common->ale, HOST_PORT_NUM, ALE_PORT_NOLEARN, 1); +} + +static int am65_cpsw_dl_switch_mode_get(struct devlink *dl, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct am65_cpsw_devlink *dl_priv = devlink_priv(dl); + struct am65_cpsw_common *common = dl_priv->common; + + dev_dbg(common->dev, "%s id:%u\n", __func__, id); + + if (id != AM65_CPSW_DL_PARAM_SWITCH_MODE) + return -EOPNOTSUPP; + + ctx->val.vbool = !common->is_emac_mode; + + return 0; +} + +static void am65_cpsw_init_port_emac_ale(struct am65_cpsw_port *port) +{ + struct am65_cpsw_slave_data *slave = &port->slave; + struct am65_cpsw_common *common = port->common; + u32 port_mask; + + writel(slave->port_vlan, port->port_base + AM65_CPSW_PORT_VLAN_REG_OFFSET); + + if (slave->mac_only) + /* enable mac-only mode on port */ + cpsw_ale_control_set(common->ale, port->port_id, + ALE_PORT_MACONLY, 1); + + cpsw_ale_control_set(common->ale, port->port_id, ALE_PORT_NOLEARN, 1); + + port_mask = BIT(port->port_id) | ALE_PORT_HOST; + + cpsw_ale_add_ucast(common->ale, port->ndev->dev_addr, + HOST_PORT_NUM, ALE_SECURE, slave->port_vlan); + cpsw_ale_add_mcast(common->ale, port->ndev->broadcast, + port_mask, ALE_VLAN, slave->port_vlan, ALE_MCAST_FWD_2); +} + +static void am65_cpsw_init_port_switch_ale(struct am65_cpsw_port *port) +{ + struct am65_cpsw_slave_data *slave = &port->slave; + struct am65_cpsw_common *cpsw = port->common; + u32 port_mask; + + cpsw_ale_control_set(cpsw->ale, port->port_id, + ALE_PORT_NOLEARN, 0); + + cpsw_ale_add_ucast(cpsw->ale, port->ndev->dev_addr, + HOST_PORT_NUM, ALE_SECURE | ALE_BLOCKED | ALE_VLAN, + slave->port_vlan); + + port_mask = BIT(port->port_id) | ALE_PORT_HOST; + + cpsw_ale_add_mcast(cpsw->ale, port->ndev->broadcast, + port_mask, ALE_VLAN, slave->port_vlan, + ALE_MCAST_FWD_2); + + writel(slave->port_vlan, port->port_base + AM65_CPSW_PORT_VLAN_REG_OFFSET); + + cpsw_ale_control_set(cpsw->ale, port->port_id, + ALE_PORT_MACONLY, 0); +} + +static int am65_cpsw_dl_switch_mode_set(struct devlink *dl, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct am65_cpsw_devlink *dl_priv = devlink_priv(dl); + struct am65_cpsw_common *cpsw = dl_priv->common; + bool switch_en = ctx->val.vbool; + bool if_running = false; + int i; + + dev_dbg(cpsw->dev, "%s id:%u\n", __func__, id); + + if (id != AM65_CPSW_DL_PARAM_SWITCH_MODE) + return -EOPNOTSUPP; + + if (switch_en == !cpsw->is_emac_mode) + return 0; + + if (!switch_en && cpsw->br_members) { + dev_err(cpsw->dev, "Remove ports from bridge before disabling switch mode\n"); + return -EINVAL; + } + + rtnl_lock(); + + cpsw->is_emac_mode = !switch_en; + + for (i = 0; i < cpsw->port_num; i++) { + struct net_device *sl_ndev = cpsw->ports[i].ndev; + + if (!sl_ndev || !netif_running(sl_ndev)) + continue; + + if_running = true; + } + + if (!if_running) { + /* all ndevs are down */ + for (i = 0; i < cpsw->port_num; i++) { + struct net_device *sl_ndev = cpsw->ports[i].ndev; + struct am65_cpsw_slave_data *slave; + + if (!sl_ndev) + continue; + + slave = am65_ndev_to_slave(sl_ndev); + if (switch_en) + slave->port_vlan = cpsw->default_vlan; + else + slave->port_vlan = 0; + } + + goto exit; + } + + cpsw_ale_control_set(cpsw->ale, 0, ALE_BYPASS, 1); + /* clean up ALE table */ + cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM, ALE_CLEAR, 1); + cpsw_ale_control_get(cpsw->ale, HOST_PORT_NUM, ALE_AGEOUT); + + if (switch_en) { + dev_info(cpsw->dev, "Enable switch mode\n"); + + am65_cpsw_init_host_port_switch(cpsw); + + for (i = 0; i < cpsw->port_num; i++) { + struct net_device *sl_ndev = cpsw->ports[i].ndev; + struct am65_cpsw_slave_data *slave; + struct am65_cpsw_port *port; + + if (!sl_ndev) + continue; + + port = am65_ndev_to_port(sl_ndev); + slave = am65_ndev_to_slave(sl_ndev); + slave->port_vlan = cpsw->default_vlan; + + if (netif_running(sl_ndev)) + am65_cpsw_init_port_switch_ale(port); + } + + } else { + dev_info(cpsw->dev, "Disable switch mode\n"); + + am65_cpsw_init_host_port_emac(cpsw); + + for (i = 0; i < cpsw->port_num; i++) { + struct net_device *sl_ndev = cpsw->ports[i].ndev; + struct am65_cpsw_port *port; + + if (!sl_ndev) + continue; + + port = am65_ndev_to_port(sl_ndev); + port->slave.port_vlan = 0; + if (netif_running(sl_ndev)) + am65_cpsw_init_port_emac_ale(port); + } + } + cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM, ALE_BYPASS, 0); +exit: + rtnl_unlock(); + + return 0; +} + +static const struct devlink_param am65_cpsw_devlink_params[] = { + DEVLINK_PARAM_DRIVER(AM65_CPSW_DL_PARAM_SWITCH_MODE, "switch_mode", + DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + am65_cpsw_dl_switch_mode_get, + am65_cpsw_dl_switch_mode_set, NULL), +}; + +static void am65_cpsw_unregister_devlink_ports(struct am65_cpsw_common *common) +{ + struct devlink_port *dl_port; + struct am65_cpsw_port *port; + int i; + + for (i = 1; i <= common->port_num; i++) { + port = am65_common_get_port(common, i); + dl_port = &port->devlink_port; + + if (dl_port->registered) + devlink_port_unregister(dl_port); + } +} + +static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common) +{ + struct devlink_port_attrs attrs = {}; + struct am65_cpsw_devlink *dl_priv; + struct device *dev = common->dev; + struct devlink_port *dl_port; + struct am65_cpsw_port *port; + int ret = 0; + int i; + + common->devlink = + devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv)); + if (!common->devlink) + return -ENOMEM; + + dl_priv = devlink_priv(common->devlink); + dl_priv->common = common; + + ret = devlink_register(common->devlink, dev); + if (ret) { + dev_err(dev, "devlink reg fail ret:%d\n", ret); + goto dl_free; + } + + /* Provide devlink hook to switch mode when multiple external ports + * are present NUSS switchdev driver is enabled. + */ + if (!AM65_CPSW_IS_CPSW2G(common) && + IS_ENABLED(CONFIG_TI_K3_AM65_CPSW_SWITCHDEV)) { + ret = devlink_params_register(common->devlink, + am65_cpsw_devlink_params, + ARRAY_SIZE(am65_cpsw_devlink_params)); + if (ret) { + dev_err(dev, "devlink params reg fail ret:%d\n", ret); + goto dl_unreg; + } + devlink_params_publish(common->devlink); + } + + for (i = 1; i <= common->port_num; i++) { + port = am65_common_get_port(common, i); + dl_port = &port->devlink_port; + + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.phys.port_number = port->port_id; + attrs.switch_id.id_len = sizeof(resource_size_t); + memcpy(attrs.switch_id.id, common->switch_id, attrs.switch_id.id_len); + devlink_port_attrs_set(dl_port, &attrs); + + ret = devlink_port_register(common->devlink, dl_port, port->port_id); + if (ret) { + dev_err(dev, "devlink_port reg fail for port %d, ret:%d\n", + port->port_id, ret); + goto dl_port_unreg; + } + devlink_port_type_eth_set(dl_port, port->ndev); + } + + return ret; + +dl_port_unreg: + am65_cpsw_unregister_devlink_ports(common); +dl_unreg: + devlink_unregister(common->devlink); +dl_free: + devlink_free(common->devlink); + + return ret; +} + +static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common) +{ + if (!AM65_CPSW_IS_CPSW2G(common) && + IS_ENABLED(CONFIG_TI_K3_AM65_CPSW_SWITCHDEV)) { + devlink_params_unpublish(common->devlink); + devlink_params_unregister(common->devlink, am65_cpsw_devlink_params, + ARRAY_SIZE(am65_cpsw_devlink_params)); + } + + am65_cpsw_unregister_devlink_ports(common); + devlink_unregister(common->devlink); + devlink_free(common->devlink); +} + static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) { struct device *dev = common->dev; @@ -2064,14 +2517,24 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) } } + ret = am65_cpsw_register_notifiers(common); + if (ret) + goto err_cleanup_ndev; + + ret = am65_cpsw_nuss_register_devlink(common); + if (ret) + goto clean_unregister_notifiers; /* can't auto unregister ndev using devm_add_action() due to * devres release sequence in DD core for DMA */ - return 0; + return 0; +clean_unregister_notifiers: + am65_cpsw_unregister_notifiers(common); err_cleanup_ndev: am65_cpsw_nuss_cleanup_ndev(common); + return ret; } @@ -2151,6 +2614,7 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) struct device_node *node; struct resource *res; struct clk *clk; + u64 id_temp; int ret, i; common = devm_kzalloc(dev, sizeof(struct am65_cpsw_common), GFP_KERNEL); @@ -2170,6 +2634,9 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) if (IS_ERR(common->ss_base)) return PTR_ERR(common->ss_base); common->cpsw_base = common->ss_base + AM65_CPSW_CPSW_NU_BASE; + /* Use device's physical base address as switch id */ + id_temp = cpu_to_be64(res->start); + memcpy(common->switch_id, &id_temp, sizeof(res->start)); node = of_get_child_by_name(dev->of_node, "ethernet-ports"); if (!node) @@ -2183,6 +2650,7 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) init_completion(&common->tdown_complete); common->tx_ch_num = 1; common->pf_p0_rx_ptype_rrobin = false; + common->default_vlan = 1; common->ports = devm_kcalloc(dev, common->port_num, sizeof(*common->ports), @@ -2262,6 +2730,8 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) dev_set_drvdata(dev, common); + common->is_emac_mode = true; + ret = am65_cpsw_nuss_init_ndevs(common); if (ret) goto err_of_clear; @@ -2295,6 +2765,9 @@ static int am65_cpsw_nuss_remove(struct platform_device *pdev) return ret; } + am65_cpsw_unregister_devlink(common); + am65_cpsw_unregister_notifiers(common); + /* must unregister ndevs here because DD release_driver routine calls * dma_deconfigure(dev) before devres_release_all(dev) */ diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index d7f8a0f76fdc..5d93e346f05e 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -6,12 +6,14 @@ #ifndef AM65_CPSW_NUSS_H_ #define AM65_CPSW_NUSS_H_ +#include <linux/if_ether.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/phy.h> #include <linux/platform_device.h> #include <linux/soc/ti/k3-ringacc.h> +#include <net/devlink.h> #include "am65-cpsw-qos.h" struct am65_cpts; @@ -22,6 +24,8 @@ struct am65_cpts; #define AM65_CPSW_MAX_RX_QUEUES 1 #define AM65_CPSW_MAX_RX_FLOWS 1 +#define AM65_CPSW_PORT_VLAN_REG_OFFSET 0x014 + struct am65_cpsw_slave_data { bool mac_only; struct cpsw_sl *mac_sl; @@ -32,6 +36,7 @@ struct am65_cpsw_slave_data { bool rx_pause; bool tx_pause; u8 mac_addr[ETH_ALEN]; + int port_vlan; }; struct am65_cpsw_port { @@ -47,6 +52,7 @@ struct am65_cpsw_port { bool tx_ts_enabled; bool rx_ts_enabled; struct am65_cpsw_qos qos; + struct devlink_port devlink_port; }; struct am65_cpsw_host { @@ -85,6 +91,15 @@ struct am65_cpsw_pdata { const char *ale_dev_id; }; +enum cpsw_devlink_param_id { + AM65_CPSW_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + AM65_CPSW_DL_PARAM_SWITCH_MODE, +}; + +struct am65_cpsw_devlink { + struct am65_cpsw_common *common; +}; + struct am65_cpsw_common { struct device *dev; struct device *mdio_dev; @@ -117,6 +132,14 @@ struct am65_cpsw_common { bool pf_p0_rx_ptype_rrobin; struct am65_cpts *cpts; int est_enabled; + + bool is_emac_mode; + u16 br_members; + int default_vlan; + struct devlink *devlink; + struct net_device *hw_bridge_dev; + struct notifier_block am65_cpsw_netdevice_nb; + unsigned char switch_id[MAX_PHYS_ITEM_ID_LEN]; }; struct am65_cpsw_ndev_stats { @@ -131,6 +154,7 @@ struct am65_cpsw_ndev_priv { u32 msg_enable; struct am65_cpsw_port *port; struct am65_cpsw_ndev_stats __percpu *stats; + bool offload_fwd_mark; }; #define am65_ndev_to_priv(ndev) \ @@ -158,4 +182,6 @@ void am65_cpsw_nuss_set_p0_ptype(struct am65_cpsw_common *common); void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common); int am65_cpsw_nuss_update_tx_chns(struct am65_cpsw_common *common, int num_tx); +bool am65_cpsw_port_dev_check(const struct net_device *dev); + #endif /* AM65_CPSW_NUSS_H_ */ diff --git a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c new file mode 100644 index 000000000000..1067e7772dbf --- /dev/null +++ b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c @@ -0,0 +1,533 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Texas Instruments K3 AM65 Ethernet Switchdev Driver + * + * Copyright (C) 2020 Texas Instruments Incorporated - https://www.ti.com/ + * + */ + +#include <linux/etherdevice.h> +#include <linux/if_bridge.h> +#include <linux/netdevice.h> +#include <linux/workqueue.h> +#include <net/switchdev.h> + +#include "am65-cpsw-nuss.h" +#include "am65-cpsw-switchdev.h" +#include "cpsw_ale.h" + +struct am65_cpsw_switchdev_event_work { + struct work_struct work; + struct switchdev_notifier_fdb_info fdb_info; + struct am65_cpsw_port *port; + unsigned long event; +}; + +static int am65_cpsw_port_stp_state_set(struct am65_cpsw_port *port, u8 state) +{ + struct am65_cpsw_common *cpsw = port->common; + u8 cpsw_state; + int ret = 0; + + switch (state) { + case BR_STATE_FORWARDING: + cpsw_state = ALE_PORT_STATE_FORWARD; + break; + case BR_STATE_LEARNING: + cpsw_state = ALE_PORT_STATE_LEARN; + break; + case BR_STATE_DISABLED: + cpsw_state = ALE_PORT_STATE_DISABLE; + break; + case BR_STATE_LISTENING: + case BR_STATE_BLOCKING: + cpsw_state = ALE_PORT_STATE_BLOCK; + break; + default: + return -EOPNOTSUPP; + } + + ret = cpsw_ale_control_set(cpsw->ale, port->port_id, + ALE_PORT_STATE, cpsw_state); + netdev_dbg(port->ndev, "ale state: %u\n", cpsw_state); + + return ret; +} + +static int am65_cpsw_port_attr_br_flags_set(struct am65_cpsw_port *port, + struct net_device *orig_dev, + unsigned long brport_flags) +{ + struct am65_cpsw_common *cpsw = port->common; + bool unreg_mcast_add = false; + + if (brport_flags & BR_MCAST_FLOOD) + unreg_mcast_add = true; + netdev_dbg(port->ndev, "BR_MCAST_FLOOD: %d port %u\n", + unreg_mcast_add, port->port_id); + + cpsw_ale_set_unreg_mcast(cpsw->ale, BIT(port->port_id), + unreg_mcast_add); + + return 0; +} + +static int am65_cpsw_port_attr_br_flags_pre_set(struct net_device *netdev, + unsigned long flags) +{ + if (flags & ~(BR_LEARNING | BR_MCAST_FLOOD)) + return -EINVAL; + + return 0; +} + +static int am65_cpsw_port_attr_set(struct net_device *ndev, + const struct switchdev_attr *attr) +{ + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + int ret; + + netdev_dbg(ndev, "attr: id %u port: %u\n", attr->id, port->port_id); + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: + ret = am65_cpsw_port_attr_br_flags_pre_set(ndev, + attr->u.brport_flags); + break; + case SWITCHDEV_ATTR_ID_PORT_STP_STATE: + ret = am65_cpsw_port_stp_state_set(port, attr->u.stp_state); + netdev_dbg(ndev, "stp state: %u\n", attr->u.stp_state); + break; + case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + ret = am65_cpsw_port_attr_br_flags_set(port, attr->orig_dev, + attr->u.brport_flags); + break; + default: + ret = -EOPNOTSUPP; + break; + } + + return ret; +} + +static u16 am65_cpsw_get_pvid(struct am65_cpsw_port *port) +{ + struct am65_cpsw_common *cpsw = port->common; + struct am65_cpsw_host *host_p = am65_common_get_host(cpsw); + u32 pvid; + + if (port->port_id) + pvid = readl(port->port_base + AM65_CPSW_PORT_VLAN_REG_OFFSET); + else + pvid = readl(host_p->port_base + AM65_CPSW_PORT_VLAN_REG_OFFSET); + + pvid = pvid & 0xfff; + + return pvid; +} + +static void am65_cpsw_set_pvid(struct am65_cpsw_port *port, u16 vid, bool cfi, u32 cos) +{ + struct am65_cpsw_common *cpsw = port->common; + struct am65_cpsw_host *host_p = am65_common_get_host(cpsw); + u32 pvid; + + pvid = vid; + pvid |= cfi ? BIT(12) : 0; + pvid |= (cos & 0x7) << 13; + + if (port->port_id) + writel(pvid, port->port_base + AM65_CPSW_PORT_VLAN_REG_OFFSET); + else + writel(pvid, host_p->port_base + AM65_CPSW_PORT_VLAN_REG_OFFSET); +} + +static int am65_cpsw_port_vlan_add(struct am65_cpsw_port *port, bool untag, bool pvid, + u16 vid, struct net_device *orig_dev) +{ + bool cpu_port = netif_is_bridge_master(orig_dev); + struct am65_cpsw_common *cpsw = port->common; + int unreg_mcast_mask = 0; + int reg_mcast_mask = 0; + int untag_mask = 0; + int port_mask; + int ret = 0; + u32 flags; + + if (cpu_port) { + port_mask = BIT(HOST_PORT_NUM); + flags = orig_dev->flags; + unreg_mcast_mask = port_mask; + } else { + port_mask = BIT(port->port_id); + flags = port->ndev->flags; + } + + if (flags & IFF_MULTICAST) + reg_mcast_mask = port_mask; + + if (untag) + untag_mask = port_mask; + + ret = cpsw_ale_vlan_add_modify(cpsw->ale, vid, port_mask, untag_mask, + reg_mcast_mask, unreg_mcast_mask); + if (ret) { + netdev_err(port->ndev, "Unable to add vlan\n"); + return ret; + } + + if (cpu_port) + cpsw_ale_add_ucast(cpsw->ale, port->slave.mac_addr, + HOST_PORT_NUM, ALE_VLAN | ALE_SECURE, vid); + if (!pvid) + return ret; + + am65_cpsw_set_pvid(port, vid, 0, 0); + + netdev_dbg(port->ndev, "VID add: %s: vid:%u ports:%X\n", + port->ndev->name, vid, port_mask); + + return ret; +} + +static int am65_cpsw_port_vlan_del(struct am65_cpsw_port *port, u16 vid, + struct net_device *orig_dev) +{ + bool cpu_port = netif_is_bridge_master(orig_dev); + struct am65_cpsw_common *cpsw = port->common; + int port_mask; + int ret = 0; + + if (cpu_port) + port_mask = BIT(HOST_PORT_NUM); + else + port_mask = BIT(port->port_id); + + ret = cpsw_ale_del_vlan(cpsw->ale, vid, port_mask); + if (ret != 0) + return ret; + + /* We don't care for the return value here, error is returned only if + * the unicast entry is not present + */ + if (cpu_port) + cpsw_ale_del_ucast(cpsw->ale, port->slave.mac_addr, + HOST_PORT_NUM, ALE_VLAN, vid); + + if (vid == am65_cpsw_get_pvid(port)) + am65_cpsw_set_pvid(port, 0, 0, 0); + + /* We don't care for the return value here, error is returned only if + * the multicast entry is not present + */ + cpsw_ale_del_mcast(cpsw->ale, port->ndev->broadcast, port_mask, + ALE_VLAN, vid); + netdev_dbg(port->ndev, "VID del: %s: vid:%u ports:%X\n", + port->ndev->name, vid, port_mask); + + return ret; +} + +static int am65_cpsw_port_vlans_add(struct am65_cpsw_port *port, + const struct switchdev_obj_port_vlan *vlan) +{ + bool untag = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; + struct net_device *orig_dev = vlan->obj.orig_dev; + bool cpu_port = netif_is_bridge_master(orig_dev); + bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; + + netdev_dbg(port->ndev, "VID add: %s: vid:%u flags:%X\n", + port->ndev->name, vlan->vid, vlan->flags); + + if (cpu_port && !(vlan->flags & BRIDGE_VLAN_INFO_BRENTRY)) + return 0; + + return am65_cpsw_port_vlan_add(port, untag, pvid, vlan->vid, orig_dev); +} + +static int am65_cpsw_port_vlans_del(struct am65_cpsw_port *port, + const struct switchdev_obj_port_vlan *vlan) + +{ + return am65_cpsw_port_vlan_del(port, vlan->vid, vlan->obj.orig_dev); +} + +static int am65_cpsw_port_mdb_add(struct am65_cpsw_port *port, + struct switchdev_obj_port_mdb *mdb) + +{ + struct net_device *orig_dev = mdb->obj.orig_dev; + bool cpu_port = netif_is_bridge_master(orig_dev); + struct am65_cpsw_common *cpsw = port->common; + int port_mask; + int err; + + if (cpu_port) + port_mask = BIT(HOST_PORT_NUM); + else + port_mask = BIT(port->port_id); + + err = cpsw_ale_add_mcast(cpsw->ale, mdb->addr, port_mask, + ALE_VLAN, mdb->vid, 0); + netdev_dbg(port->ndev, "MDB add: %s: vid %u:%pM ports: %X\n", + port->ndev->name, mdb->vid, mdb->addr, port_mask); + + return err; +} + +static int am65_cpsw_port_mdb_del(struct am65_cpsw_port *port, + struct switchdev_obj_port_mdb *mdb) + +{ + struct net_device *orig_dev = mdb->obj.orig_dev; + bool cpu_port = netif_is_bridge_master(orig_dev); + struct am65_cpsw_common *cpsw = port->common; + int del_mask; + + if (cpu_port) + del_mask = BIT(HOST_PORT_NUM); + else + del_mask = BIT(port->port_id); + + /* Ignore error as error code is returned only when entry is already removed */ + cpsw_ale_del_mcast(cpsw->ale, mdb->addr, del_mask, + ALE_VLAN, mdb->vid); + netdev_dbg(port->ndev, "MDB del: %s: vid %u:%pM ports: %X\n", + port->ndev->name, mdb->vid, mdb->addr, del_mask); + + return 0; +} + +static int am65_cpsw_port_obj_add(struct net_device *ndev, + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack) +{ + struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); + struct switchdev_obj_port_mdb *mdb = SWITCHDEV_OBJ_PORT_MDB(obj); + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + int err = 0; + + netdev_dbg(ndev, "obj_add: id %u port: %u\n", obj->id, port->port_id); + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + err = am65_cpsw_port_vlans_add(port, vlan); + break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + case SWITCHDEV_OBJ_ID_HOST_MDB: + err = am65_cpsw_port_mdb_add(port, mdb); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int am65_cpsw_port_obj_del(struct net_device *ndev, + const struct switchdev_obj *obj) +{ + struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); + struct switchdev_obj_port_mdb *mdb = SWITCHDEV_OBJ_PORT_MDB(obj); + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + int err = 0; + + netdev_dbg(ndev, "obj_del: id %u port: %u\n", obj->id, port->port_id); + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + err = am65_cpsw_port_vlans_del(port, vlan); + break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + case SWITCHDEV_OBJ_ID_HOST_MDB: + err = am65_cpsw_port_mdb_del(port, mdb); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static void am65_cpsw_fdb_offload_notify(struct net_device *ndev, + struct switchdev_notifier_fdb_info *rcv) +{ + struct switchdev_notifier_fdb_info info; + + info.addr = rcv->addr; + info.vid = rcv->vid; + info.offloaded = true; + call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, + ndev, &info.info, NULL); +} + +static void am65_cpsw_switchdev_event_work(struct work_struct *work) +{ + struct am65_cpsw_switchdev_event_work *switchdev_work = + container_of(work, struct am65_cpsw_switchdev_event_work, work); + struct am65_cpsw_port *port = switchdev_work->port; + struct switchdev_notifier_fdb_info *fdb; + struct am65_cpsw_common *cpsw = port->common; + int port_id = port->port_id; + + rtnl_lock(); + switch (switchdev_work->event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + fdb = &switchdev_work->fdb_info; + + netdev_dbg(port->ndev, "cpsw_fdb_add: MACID = %pM vid = %u flags = %u %u -- port %d\n", + fdb->addr, fdb->vid, fdb->added_by_user, + fdb->offloaded, port_id); + + if (!fdb->added_by_user) + break; + if (memcmp(port->slave.mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0) + port_id = HOST_PORT_NUM; + + cpsw_ale_add_ucast(cpsw->ale, (u8 *)fdb->addr, port_id, + fdb->vid ? ALE_VLAN : 0, fdb->vid); + am65_cpsw_fdb_offload_notify(port->ndev, fdb); + break; + case SWITCHDEV_FDB_DEL_TO_DEVICE: + fdb = &switchdev_work->fdb_info; + + netdev_dbg(port->ndev, "cpsw_fdb_del: MACID = %pM vid = %u flags = %u %u -- port %d\n", + fdb->addr, fdb->vid, fdb->added_by_user, + fdb->offloaded, port_id); + + if (!fdb->added_by_user) + break; + if (memcmp(port->slave.mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0) + port_id = HOST_PORT_NUM; + + cpsw_ale_del_ucast(cpsw->ale, (u8 *)fdb->addr, port_id, + fdb->vid ? ALE_VLAN : 0, fdb->vid); + break; + default: + break; + } + rtnl_unlock(); + + kfree(switchdev_work->fdb_info.addr); + kfree(switchdev_work); + dev_put(port->ndev); +} + +/* called under rcu_read_lock() */ +static int am65_cpsw_switchdev_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *ndev = switchdev_notifier_info_to_dev(ptr); + struct am65_cpsw_switchdev_event_work *switchdev_work; + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + struct switchdev_notifier_fdb_info *fdb_info = ptr; + int err; + + if (event == SWITCHDEV_PORT_ATTR_SET) { + err = switchdev_handle_port_attr_set(ndev, ptr, + am65_cpsw_port_dev_check, + am65_cpsw_port_attr_set); + return notifier_from_errno(err); + } + + if (!am65_cpsw_port_dev_check(ndev)) + return NOTIFY_DONE; + + switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); + if (WARN_ON(!switchdev_work)) + return NOTIFY_BAD; + + INIT_WORK(&switchdev_work->work, am65_cpsw_switchdev_event_work); + switchdev_work->port = port; + switchdev_work->event = event; + + switch (event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + case SWITCHDEV_FDB_DEL_TO_DEVICE: + memcpy(&switchdev_work->fdb_info, ptr, + sizeof(switchdev_work->fdb_info)); + switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC); + if (!switchdev_work->fdb_info.addr) + goto err_addr_alloc; + ether_addr_copy((u8 *)switchdev_work->fdb_info.addr, + fdb_info->addr); + dev_hold(ndev); + break; + default: + kfree(switchdev_work); + return NOTIFY_DONE; + } + + queue_work(system_long_wq, &switchdev_work->work); + + return NOTIFY_DONE; + +err_addr_alloc: + kfree(switchdev_work); + return NOTIFY_BAD; +} + +static struct notifier_block cpsw_switchdev_notifier = { + .notifier_call = am65_cpsw_switchdev_event, +}; + +static int am65_cpsw_switchdev_blocking_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + int err; + + switch (event) { + case SWITCHDEV_PORT_OBJ_ADD: + err = switchdev_handle_port_obj_add(dev, ptr, + am65_cpsw_port_dev_check, + am65_cpsw_port_obj_add); + return notifier_from_errno(err); + case SWITCHDEV_PORT_OBJ_DEL: + err = switchdev_handle_port_obj_del(dev, ptr, + am65_cpsw_port_dev_check, + am65_cpsw_port_obj_del); + return notifier_from_errno(err); + case SWITCHDEV_PORT_ATTR_SET: + err = switchdev_handle_port_attr_set(dev, ptr, + am65_cpsw_port_dev_check, + am65_cpsw_port_attr_set); + return notifier_from_errno(err); + default: + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block cpsw_switchdev_bl_notifier = { + .notifier_call = am65_cpsw_switchdev_blocking_event, +}; + +int am65_cpsw_switchdev_register_notifiers(struct am65_cpsw_common *cpsw) +{ + int ret = 0; + + ret = register_switchdev_notifier(&cpsw_switchdev_notifier); + if (ret) { + dev_err(cpsw->dev, "register switchdev notifier fail ret:%d\n", + ret); + return ret; + } + + ret = register_switchdev_blocking_notifier(&cpsw_switchdev_bl_notifier); + if (ret) { + dev_err(cpsw->dev, "register switchdev blocking notifier ret:%d\n", + ret); + unregister_switchdev_notifier(&cpsw_switchdev_notifier); + } + + return ret; +} + +void am65_cpsw_switchdev_unregister_notifiers(struct am65_cpsw_common *cpsw) +{ + unregister_switchdev_blocking_notifier(&cpsw_switchdev_bl_notifier); + unregister_switchdev_notifier(&cpsw_switchdev_notifier); +} diff --git a/drivers/net/ethernet/ti/am65-cpsw-switchdev.h b/drivers/net/ethernet/ti/am65-cpsw-switchdev.h new file mode 100644 index 000000000000..a67a7606bc80 --- /dev/null +++ b/drivers/net/ethernet/ti/am65-cpsw-switchdev.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 Texas Instruments Incorporated - https://www.ti.com/ + */ + +#ifndef DRIVERS_NET_ETHERNET_TI_AM65_CPSW_SWITCHDEV_H_ +#define DRIVERS_NET_ETHERNET_TI_AM65_CPSW_SWITCHDEV_H_ + +#include <linux/skbuff.h> + +#if IS_ENABLED(CONFIG_TI_K3_AM65_CPSW_SWITCHDEV) +static inline void am65_cpsw_nuss_set_offload_fwd_mark(struct sk_buff *skb, bool val) +{ + skb->offload_fwd_mark = val; +} + +int am65_cpsw_switchdev_register_notifiers(struct am65_cpsw_common *cpsw); +void am65_cpsw_switchdev_unregister_notifiers(struct am65_cpsw_common *cpsw); +#else +static inline int am65_cpsw_switchdev_register_notifiers(struct am65_cpsw_common *cpsw) +{ + return -EOPNOTSUPP; +} + +static inline void am65_cpsw_switchdev_unregister_notifiers(struct am65_cpsw_common *cpsw) +{ +} + +static inline void am65_cpsw_nuss_set_offload_fwd_mark(struct sk_buff *skb, bool val) +{ +} + +#endif + +#endif /* DRIVERS_NET_ETHERNET_TI_AM65_CPSW_SWITCHDEV_H_ */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 9db1ea3affbb..dc3f73c3b33e 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1318,8 +1318,11 @@ static int netvsc_receive(struct net_device *ndev, ret = rndis_filter_receive(ndev, net_device, nvchan, data, buflen); - if (unlikely(ret != NVSP_STAT_SUCCESS)) + if (unlikely(ret != NVSP_STAT_SUCCESS)) { + /* Drop incomplete packet */ + nvchan->rsc.cnt = 0; status = NVSP_STAT_FAIL; + } } enq_receive_complete(ndev, net_device, q_idx, diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 0c2ebe7ac655..123cc9d25f5e 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -577,8 +577,6 @@ static int rndis_filter_receive_data(struct net_device *ndev, return ret; drop: - /* Drop incomplete packet */ - nvchan->rsc.cnt = 0; return NVSP_STAT_FAIL; } diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 511c94f66036..440213646188 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -1776,6 +1776,7 @@ static int gsi_channel_setup(struct gsi *gsi) if (!channel->gsi) continue; /* Ignore uninitialized channels */ + ret = -EINVAL; dev_err(gsi->dev, "channel %u not supported by hardware\n", channel_id - 1); channel_id = gsi->channel_count; diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 24487ec17f8b..a1c77cc00416 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -219,12 +219,6 @@ static __net_init int loopback_net_init(struct net *net) BUG_ON(dev->ifindex != LOOPBACK_IFINDEX); net->loopback_dev = dev; - - /* bring loopback device UP */ - rtnl_lock(); - dev_open(dev, NULL); - rtnl_unlock(); - return 0; out_free_netdev: diff --git a/drivers/net/mhi/Makefile b/drivers/net/mhi/Makefile new file mode 100644 index 000000000000..f71b9f8f3c4f --- /dev/null +++ b/drivers/net/mhi/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_MHI_NET) += mhi_net.o + +mhi_net-y := net.o proto_mbim.o diff --git a/drivers/net/mhi/mhi.h b/drivers/net/mhi/mhi.h new file mode 100644 index 000000000000..12e7407d712a --- /dev/null +++ b/drivers/net/mhi/mhi.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* MHI Network driver - Network over MHI bus + * + * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org> + */ + +struct mhi_net_stats { + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_errors; + u64_stats_t rx_dropped; + u64_stats_t rx_length_errors; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; + u64_stats_t tx_errors; + u64_stats_t tx_dropped; + struct u64_stats_sync tx_syncp; + struct u64_stats_sync rx_syncp; +}; + +struct mhi_net_dev { + struct mhi_device *mdev; + struct net_device *ndev; + struct sk_buff *skbagg_head; + struct sk_buff *skbagg_tail; + const struct mhi_net_proto *proto; + void *proto_data; + struct delayed_work rx_refill; + struct mhi_net_stats stats; + u32 rx_queue_sz; + int msg_enable; +}; + +struct mhi_net_proto { + int (*init)(struct mhi_net_dev *mhi_netdev); + struct sk_buff * (*tx_fixup)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb); + void (*rx)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb); +}; + +extern const struct mhi_net_proto proto_mbim; diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi/net.c index 880099193734..f59960876083 100644 --- a/drivers/net/mhi_net.c +++ b/drivers/net/mhi/net.c @@ -12,31 +12,15 @@ #include <linux/skbuff.h> #include <linux/u64_stats_sync.h> +#include "mhi.h" + #define MHI_NET_MIN_MTU ETH_MIN_MTU #define MHI_NET_MAX_MTU 0xffff #define MHI_NET_DEFAULT_MTU 0x4000 -struct mhi_net_stats { - u64_stats_t rx_packets; - u64_stats_t rx_bytes; - u64_stats_t rx_errors; - u64_stats_t rx_dropped; - u64_stats_t tx_packets; - u64_stats_t tx_bytes; - u64_stats_t tx_errors; - u64_stats_t tx_dropped; - struct u64_stats_sync tx_syncp; - struct u64_stats_sync rx_syncp; -}; - -struct mhi_net_dev { - struct mhi_device *mdev; - struct net_device *ndev; - struct sk_buff *skbagg_head; - struct sk_buff *skbagg_tail; - struct delayed_work rx_refill; - struct mhi_net_stats stats; - u32 rx_queue_sz; +struct mhi_device_info { + const char *netname; + const struct mhi_net_proto *proto; }; static int mhi_ndo_open(struct net_device *ndev) @@ -68,26 +52,35 @@ static int mhi_ndo_stop(struct net_device *ndev) static int mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev) { struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); + const struct mhi_net_proto *proto = mhi_netdev->proto; struct mhi_device *mdev = mhi_netdev->mdev; int err; + if (proto && proto->tx_fixup) { + skb = proto->tx_fixup(mhi_netdev, skb); + if (unlikely(!skb)) + goto exit_drop; + } + err = mhi_queue_skb(mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT); if (unlikely(err)) { net_err_ratelimited("%s: Failed to queue TX buf (%d)\n", ndev->name, err); - - u64_stats_update_begin(&mhi_netdev->stats.tx_syncp); - u64_stats_inc(&mhi_netdev->stats.tx_dropped); - u64_stats_update_end(&mhi_netdev->stats.tx_syncp); - - /* drop the packet */ dev_kfree_skb_any(skb); + goto exit_drop; } if (mhi_queue_is_full(mdev, DMA_TO_DEVICE)) netif_stop_queue(ndev); return NETDEV_TX_OK; + +exit_drop: + u64_stats_update_begin(&mhi_netdev->stats.tx_syncp); + u64_stats_inc(&mhi_netdev->stats.tx_dropped); + u64_stats_update_end(&mhi_netdev->stats.tx_syncp); + + return NETDEV_TX_OK; } static void mhi_ndo_get_stats64(struct net_device *ndev, @@ -102,6 +95,7 @@ static void mhi_ndo_get_stats64(struct net_device *ndev, stats->rx_bytes = u64_stats_read(&mhi_netdev->stats.rx_bytes); stats->rx_errors = u64_stats_read(&mhi_netdev->stats.rx_errors); stats->rx_dropped = u64_stats_read(&mhi_netdev->stats.rx_dropped); + stats->rx_length_errors = u64_stats_read(&mhi_netdev->stats.rx_length_errors); } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.rx_syncp, start)); do { @@ -164,6 +158,7 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev, struct mhi_result *mhi_res) { struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev); + const struct mhi_net_proto *proto = mhi_netdev->proto; struct sk_buff *skb = mhi_res->buf_addr; int free_desc_count; @@ -220,7 +215,10 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev, break; } - netif_rx(skb); + if (proto && proto->rx) + proto->rx(mhi_netdev, skb); + else + netif_rx(skb); } /* Refill if RX buffers queue becomes low */ @@ -302,14 +300,14 @@ static struct device_type wwan_type = { static int mhi_net_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) { - const char *netname = (char *)id->driver_data; + const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data; struct device *dev = &mhi_dev->dev; struct mhi_net_dev *mhi_netdev; struct net_device *ndev; int err; - ndev = alloc_netdev(sizeof(*mhi_netdev), netname, NET_NAME_PREDICTABLE, - mhi_net_setup); + ndev = alloc_netdev(sizeof(*mhi_netdev), info->netname, + NET_NAME_PREDICTABLE, mhi_net_setup); if (!ndev) return -ENOMEM; @@ -318,6 +316,7 @@ static int mhi_net_probe(struct mhi_device *mhi_dev, mhi_netdev->ndev = ndev; mhi_netdev->mdev = mhi_dev; mhi_netdev->skbagg_head = NULL; + mhi_netdev->proto = info->proto; SET_NETDEV_DEV(ndev, &mhi_dev->dev); SET_NETDEV_DEVTYPE(ndev, &wwan_type); @@ -337,8 +336,16 @@ static int mhi_net_probe(struct mhi_device *mhi_dev, if (err) goto out_err; + if (mhi_netdev->proto) { + err = mhi_netdev->proto->init(mhi_netdev); + if (err) + goto out_err_proto; + } + return 0; +out_err_proto: + unregister_netdev(ndev); out_err: free_netdev(ndev); return err; @@ -358,9 +365,26 @@ static void mhi_net_remove(struct mhi_device *mhi_dev) free_netdev(mhi_netdev->ndev); } +static const struct mhi_device_info mhi_hwip0 = { + .netname = "mhi_hwip%d", +}; + +static const struct mhi_device_info mhi_swip0 = { + .netname = "mhi_swip%d", +}; + +static const struct mhi_device_info mhi_hwip0_mbim = { + .netname = "mhi_mbim%d", + .proto = &proto_mbim, +}; + static const struct mhi_device_id mhi_net_id_table[] = { - { .chan = "IP_HW0", .driver_data = (kernel_ulong_t)"mhi_hwip%d" }, - { .chan = "IP_SW0", .driver_data = (kernel_ulong_t)"mhi_swip%d" }, + /* Hardware accelerated data PATH (to modem IPA), protocol agnostic */ + { .chan = "IP_HW0", .driver_data = (kernel_ulong_t)&mhi_hwip0 }, + /* Software data PATH (to modem CPU) */ + { .chan = "IP_SW0", .driver_data = (kernel_ulong_t)&mhi_swip0 }, + /* Hardware accelerated data PATH (to modem IPA), MBIM protocol */ + { .chan = "IP_HW0_MBIM", .driver_data = (kernel_ulong_t)&mhi_hwip0_mbim }, {} }; MODULE_DEVICE_TABLE(mhi, mhi_net_id_table); diff --git a/drivers/net/mhi/proto_mbim.c b/drivers/net/mhi/proto_mbim.c new file mode 100644 index 000000000000..75b5484c40d5 --- /dev/null +++ b/drivers/net/mhi/proto_mbim.c @@ -0,0 +1,293 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* MHI Network driver - Network over MHI bus + * + * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org> + * + * This driver copy some code from cdc_ncm, which is: + * Copyright (C) ST-Ericsson 2010-2012 + * and cdc_mbim, which is: + * Copyright (c) 2012 Smith Micro Software, Inc. + * Copyright (c) 2012 Bjørn Mork <bjorn@mork.no> + * + */ + +#include <linux/ethtool.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/mii.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/usb.h> +#include <linux/usb/cdc.h> +#include <linux/usb/usbnet.h> +#include <linux/usb/cdc_ncm.h> + +#include "mhi.h" + +#define MBIM_NDP16_SIGN_MASK 0x00ffffff + +struct mbim_context { + u16 rx_seq; + u16 tx_seq; +}; + +static void __mbim_length_errors_inc(struct mhi_net_dev *dev) +{ + u64_stats_update_begin(&dev->stats.rx_syncp); + u64_stats_inc(&dev->stats.rx_length_errors); + u64_stats_update_end(&dev->stats.rx_syncp); +} + +static void __mbim_errors_inc(struct mhi_net_dev *dev) +{ + u64_stats_update_begin(&dev->stats.rx_syncp); + u64_stats_inc(&dev->stats.rx_errors); + u64_stats_update_end(&dev->stats.rx_syncp); +} + +static int mbim_rx_verify_nth16(struct sk_buff *skb) +{ + struct mhi_net_dev *dev = netdev_priv(skb->dev); + struct mbim_context *ctx = dev->proto_data; + struct usb_cdc_ncm_nth16 *nth16; + int len; + + if (skb->len < sizeof(struct usb_cdc_ncm_nth16) + + sizeof(struct usb_cdc_ncm_ndp16)) { + netif_dbg(dev, rx_err, dev->ndev, "frame too short\n"); + __mbim_length_errors_inc(dev); + return -EINVAL; + } + + nth16 = (struct usb_cdc_ncm_nth16 *)skb->data; + + if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) { + netif_dbg(dev, rx_err, dev->ndev, + "invalid NTH16 signature <%#010x>\n", + le32_to_cpu(nth16->dwSignature)); + __mbim_errors_inc(dev); + return -EINVAL; + } + + /* No limit on the block length, except the size of the data pkt */ + len = le16_to_cpu(nth16->wBlockLength); + if (len > skb->len) { + netif_dbg(dev, rx_err, dev->ndev, + "NTB does not fit into the skb %u/%u\n", len, + skb->len); + __mbim_length_errors_inc(dev); + return -EINVAL; + } + + if (ctx->rx_seq + 1 != le16_to_cpu(nth16->wSequence) && + (ctx->rx_seq || le16_to_cpu(nth16->wSequence)) && + !(ctx->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) { + netif_dbg(dev, rx_err, dev->ndev, + "sequence number glitch prev=%d curr=%d\n", + ctx->rx_seq, le16_to_cpu(nth16->wSequence)); + } + ctx->rx_seq = le16_to_cpu(nth16->wSequence); + + return le16_to_cpu(nth16->wNdpIndex); +} + +static int mbim_rx_verify_ndp16(struct sk_buff *skb, int ndpoffset) +{ + struct mhi_net_dev *dev = netdev_priv(skb->dev); + struct usb_cdc_ncm_ndp16 *ndp16; + int ret; + + if (ndpoffset + sizeof(struct usb_cdc_ncm_ndp16) > skb->len) { + netif_dbg(dev, rx_err, dev->ndev, "invalid NDP offset <%u>\n", + ndpoffset); + return -EINVAL; + } + + ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb->data + ndpoffset); + + if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) { + netif_dbg(dev, rx_err, dev->ndev, "invalid DPT16 length <%u>\n", + le16_to_cpu(ndp16->wLength)); + return -EINVAL; + } + + ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16)) + / sizeof(struct usb_cdc_ncm_dpe16)); + ret--; /* Last entry is always a NULL terminator */ + + if (sizeof(struct usb_cdc_ncm_ndp16) + + ret * sizeof(struct usb_cdc_ncm_dpe16) > skb->len) { + netif_dbg(dev, rx_err, dev->ndev, + "Invalid nframes = %d\n", ret); + return -EINVAL; + } + + return ret; +} + +static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb) +{ + struct net_device *ndev = mhi_netdev->ndev; + int ndpoffset; + + if (skb_linearize(skb)) + goto error; + + /* Check NTB header and retrieve first NDP offset */ + ndpoffset = mbim_rx_verify_nth16(skb); + if (ndpoffset < 0) { + net_err_ratelimited("%s: Incorrect NTB header\n", ndev->name); + goto error; + } + + /* Process each NDP */ + while (1) { + struct usb_cdc_ncm_ndp16 *ndp16; + struct usb_cdc_ncm_dpe16 *dpe16; + int nframes, n; + + /* Check NDP header and retrieve number of datagrams */ + nframes = mbim_rx_verify_ndp16(skb, ndpoffset); + if (nframes < 0) { + net_err_ratelimited("%s: Incorrect NDP16\n", ndev->name); + __mbim_length_errors_inc(mhi_netdev); + goto error; + } + + /* Only IP data type supported, no DSS in MHI context */ + ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb->data + ndpoffset); + if ((ndp16->dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK)) + != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) { + net_err_ratelimited("%s: Unsupported NDP type\n", ndev->name); + __mbim_errors_inc(mhi_netdev); + goto next_ndp; + } + + /* Only primary IP session 0 (0x00) supported for now */ + if (ndp16->dwSignature & ~cpu_to_le32(MBIM_NDP16_SIGN_MASK)) { + net_err_ratelimited("%s: bad packet session\n", ndev->name); + __mbim_errors_inc(mhi_netdev); + goto next_ndp; + } + + /* de-aggregate and deliver IP packets */ + dpe16 = ndp16->dpe16; + for (n = 0; n < nframes; n++, dpe16++) { + u16 dgram_offset = le16_to_cpu(dpe16->wDatagramIndex); + u16 dgram_len = le16_to_cpu(dpe16->wDatagramLength); + struct sk_buff *skbn; + + if (!dgram_offset || !dgram_len) + break; /* null terminator */ + + skbn = netdev_alloc_skb(ndev, dgram_len); + if (!skbn) + continue; + + skb_put(skbn, dgram_len); + memcpy(skbn->data, skb->data + dgram_offset, dgram_len); + + switch (skbn->data[0] & 0xf0) { + case 0x40: + skbn->protocol = htons(ETH_P_IP); + break; + case 0x60: + skbn->protocol = htons(ETH_P_IPV6); + break; + default: + net_err_ratelimited("%s: unknown protocol\n", + ndev->name); + __mbim_errors_inc(mhi_netdev); + dev_kfree_skb_any(skbn); + continue; + } + + netif_rx(skbn); + } +next_ndp: + /* Other NDP to process? */ + ndpoffset = (int)le16_to_cpu(ndp16->wNextNdpIndex); + if (!ndpoffset) + break; + } + + /* free skb */ + dev_consume_skb_any(skb); + return; +error: + dev_kfree_skb_any(skb); +} + +struct mbim_tx_hdr { + struct usb_cdc_ncm_nth16 nth16; + struct usb_cdc_ncm_ndp16 ndp16; + struct usb_cdc_ncm_dpe16 dpe16[2]; +} __packed; + +static struct sk_buff *mbim_tx_fixup(struct mhi_net_dev *mhi_netdev, + struct sk_buff *skb) +{ + struct mbim_context *ctx = mhi_netdev->proto_data; + unsigned int dgram_size = skb->len; + struct usb_cdc_ncm_nth16 *nth16; + struct usb_cdc_ncm_ndp16 *ndp16; + struct mbim_tx_hdr *mbim_hdr; + + /* For now, this is a partial implementation of CDC MBIM, only one NDP + * is sent, containing the IP packet (no aggregation). + */ + + /* Ensure we have enough headroom for crafting MBIM header */ + if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) { + dev_kfree_skb_any(skb); + return NULL; + } + + mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr)); + + /* Fill NTB header */ + nth16 = &mbim_hdr->nth16; + nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN); + nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16)); + nth16->wSequence = cpu_to_le16(ctx->tx_seq++); + nth16->wBlockLength = cpu_to_le16(skb->len); + nth16->wNdpIndex = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16)); + + /* Fill the unique NDP */ + ndp16 = &mbim_hdr->ndp16; + ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN); + ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) + + sizeof(struct usb_cdc_ncm_dpe16) * 2); + ndp16->wNextNdpIndex = 0; + + /* Datagram follows the mbim header */ + ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr)); + ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size); + + /* null termination */ + ndp16->dpe16[1].wDatagramIndex = 0; + ndp16->dpe16[1].wDatagramLength = 0; + + return skb; +} + +static int mbim_init(struct mhi_net_dev *mhi_netdev) +{ + struct net_device *ndev = mhi_netdev->ndev; + + mhi_netdev->proto_data = devm_kzalloc(&ndev->dev, + sizeof(struct mbim_context), + GFP_KERNEL); + if (!mhi_netdev->proto_data) + return -ENOMEM; + + ndev->needed_headroom = sizeof(struct mbim_tx_hdr); + + return 0; +} + +const struct mhi_net_proto proto_mbim = { + .init = mbim_init, + .rx = mbim_rx, + .tx_fixup = mbim_tx_fixup, +}; diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 816af1f55e2c..dbeb29fa16e8 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -1012,23 +1012,25 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, nsim_dev->fw_update_status = true; nsim_dev->fw_update_overwrite_mask = 0; - nsim_dev->fib_data = nsim_fib_create(devlink, extack); - if (IS_ERR(nsim_dev->fib_data)) - return PTR_ERR(nsim_dev->fib_data); - nsim_devlink_param_load_driverinit_values(devlink); err = nsim_dev_dummy_region_init(nsim_dev, devlink); if (err) - goto err_fib_destroy; + return err; err = nsim_dev_traps_init(devlink); if (err) goto err_dummy_region_exit; + nsim_dev->fib_data = nsim_fib_create(devlink, extack); + if (IS_ERR(nsim_dev->fib_data)) { + err = PTR_ERR(nsim_dev->fib_data); + goto err_traps_exit; + } + err = nsim_dev_health_init(nsim_dev, devlink); if (err) - goto err_traps_exit; + goto err_fib_destroy; err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); if (err) @@ -1043,12 +1045,12 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, err_health_exit: nsim_dev_health_exit(nsim_dev); +err_fib_destroy: + nsim_fib_destroy(devlink, nsim_dev->fib_data); err_traps_exit: nsim_dev_traps_exit(devlink); err_dummy_region_exit: nsim_dev_dummy_region_exit(nsim_dev); -err_fib_destroy: - nsim_fib_destroy(devlink, nsim_dev->fib_data); return err; } @@ -1080,15 +1082,9 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) if (err) goto err_devlink_free; - nsim_dev->fib_data = nsim_fib_create(devlink, NULL); - if (IS_ERR(nsim_dev->fib_data)) { - err = PTR_ERR(nsim_dev->fib_data); - goto err_resources_unregister; - } - err = devlink_register(devlink, &nsim_bus_dev->dev); if (err) - goto err_fib_destroy; + goto err_resources_unregister; err = devlink_params_register(devlink, nsim_devlink_params, ARRAY_SIZE(nsim_devlink_params)); @@ -1108,9 +1104,15 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) if (err) goto err_traps_exit; + nsim_dev->fib_data = nsim_fib_create(devlink, NULL); + if (IS_ERR(nsim_dev->fib_data)) { + err = PTR_ERR(nsim_dev->fib_data); + goto err_debugfs_exit; + } + err = nsim_dev_health_init(nsim_dev, devlink); if (err) - goto err_debugfs_exit; + goto err_fib_destroy; err = nsim_bpf_dev_init(nsim_dev); if (err) @@ -1128,6 +1130,8 @@ err_bpf_dev_exit: nsim_bpf_dev_exit(nsim_dev); err_health_exit: nsim_dev_health_exit(nsim_dev); +err_fib_destroy: + nsim_fib_destroy(devlink, nsim_dev->fib_data); err_debugfs_exit: nsim_dev_debugfs_exit(nsim_dev); err_traps_exit: @@ -1139,8 +1143,6 @@ err_params_unregister: ARRAY_SIZE(nsim_devlink_params)); err_dl_unregister: devlink_unregister(devlink); -err_fib_destroy: - nsim_fib_destroy(devlink, nsim_dev->fib_data); err_resources_unregister: devlink_resources_unregister(devlink, NULL); err_devlink_free: @@ -1157,10 +1159,10 @@ static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev) debugfs_remove(nsim_dev->take_snapshot); nsim_dev_port_del_all(nsim_dev); nsim_dev_health_exit(nsim_dev); + nsim_fib_destroy(devlink, nsim_dev->fib_data); nsim_dev_traps_exit(devlink); nsim_dev_dummy_region_exit(nsim_dev); mutex_destroy(&nsim_dev->port_list_lock); - nsim_fib_destroy(devlink, nsim_dev->fib_data); } void nsim_dev_remove(struct nsim_bus_dev *nsim_bus_dev) diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index 1779146926a5..46fb414f7ca6 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -26,6 +26,7 @@ #include <net/fib_rules.h> #include <net/net_namespace.h> #include <net/nexthop.h> +#include <linux/debugfs.h> #include "netdevsim.h" @@ -53,6 +54,8 @@ struct nsim_fib_data { struct work_struct fib_event_work; struct list_head fib_event_queue; spinlock_t fib_event_queue_lock; /* Protects fib event queue list */ + struct dentry *ddir; + bool fail_route_offload; }; struct nsim_fib_rt_key { @@ -303,6 +306,25 @@ nsim_fib4_rt_lookup(struct rhashtable *fib_rt_ht, return container_of(fib_rt, struct nsim_fib4_rt, common); } +static void +nsim_fib4_rt_offload_failed_flag_set(struct net *net, + struct fib_entry_notifier_info *fen_info) +{ + u32 *p_dst = (u32 *)&fen_info->dst; + struct fib_rt_info fri; + + fri.fi = fen_info->fi; + fri.tb_id = fen_info->tb_id; + fri.dst = cpu_to_be32(*p_dst); + fri.dst_len = fen_info->dst_len; + fri.tos = fen_info->tos; + fri.type = fen_info->type; + fri.offload = false; + fri.trap = false; + fri.offload_failed = true; + fib_alias_hw_flags_set(net, &fri); +} + static void nsim_fib4_rt_hw_flags_set(struct net *net, const struct nsim_fib4_rt *fib4_rt, bool trap) @@ -319,6 +341,7 @@ static void nsim_fib4_rt_hw_flags_set(struct net *net, fri.type = fib4_rt->type; fri.offload = false; fri.trap = trap; + fri.offload_failed = false; fib_alias_hw_flags_set(net, &fri); } @@ -383,6 +406,15 @@ static int nsim_fib4_rt_insert(struct nsim_fib_data *data, struct nsim_fib4_rt *fib4_rt, *fib4_rt_old; int err; + if (data->fail_route_offload) { + /* For testing purposes, user set debugfs fail_route_offload + * value to true. Simulate hardware programming latency and then + * fail. + */ + msleep(1); + return -EINVAL; + } + fib4_rt = nsim_fib4_rt_create(data, fen_info); if (!fib4_rt) return -ENOMEM; @@ -405,7 +437,7 @@ static void nsim_fib4_rt_remove(struct nsim_fib_data *data, struct nsim_fib4_rt *fib4_rt; fib4_rt = nsim_fib4_rt_lookup(&data->fib_rt_ht, fen_info); - if (WARN_ON_ONCE(!fib4_rt)) + if (!fib4_rt) return; rhashtable_remove_fast(&data->fib_rt_ht, &fib4_rt->common.ht_node, @@ -422,6 +454,11 @@ static int nsim_fib4_event(struct nsim_fib_data *data, switch (event) { case FIB_EVENT_ENTRY_REPLACE: err = nsim_fib4_rt_insert(data, fen_info); + if (err) { + struct net *net = devlink_net(data->devlink); + + nsim_fib4_rt_offload_failed_flag_set(net, fen_info); + } break; case FIB_EVENT_ENTRY_DEL: nsim_fib4_rt_remove(data, fen_info); @@ -481,7 +518,7 @@ static void nsim_fib6_rt_nh_del(struct nsim_fib6_rt *fib6_rt, struct nsim_fib6_rt_nh *fib6_rt_nh; fib6_rt_nh = nsim_fib6_rt_nh_find(fib6_rt, rt); - if (WARN_ON_ONCE(!fib6_rt_nh)) + if (!fib6_rt_nh) return; fib6_rt->nhs--; @@ -563,8 +600,17 @@ static int nsim_fib6_rt_append(struct nsim_fib_data *data, struct nsim_fib6_rt *fib6_rt; int i, err; + if (data->fail_route_offload) { + /* For testing purposes, user set debugfs fail_route_offload + * value to true. Simulate hardware programming latency and then + * fail. + */ + msleep(1); + return -EINVAL; + } + fib6_rt = nsim_fib6_rt_lookup(&data->fib_rt_ht, rt); - if (WARN_ON_ONCE(!fib6_rt)) + if (!fib6_rt) return -EINVAL; for (i = 0; i < fib6_event->nrt6; i++) { @@ -586,6 +632,26 @@ err_fib6_rt_nh_del: } #if IS_ENABLED(CONFIG_IPV6) +static void nsim_fib6_rt_offload_failed_flag_set(struct nsim_fib_data *data, + struct fib6_info **rt_arr, + unsigned int nrt6) + +{ + struct net *net = devlink_net(data->devlink); + int i; + + for (i = 0; i < nrt6; i++) + fib6_info_hw_flags_set(net, rt_arr[i], false, false, true); +} +#else +static void nsim_fib6_rt_offload_failed_flag_set(struct nsim_fib_data *data, + struct fib6_info **rt_arr, + unsigned int nrt6) +{ +} +#endif + +#if IS_ENABLED(CONFIG_IPV6) static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data, const struct nsim_fib6_rt *fib6_rt, bool trap) @@ -594,7 +660,7 @@ static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data, struct nsim_fib6_rt_nh *fib6_rt_nh; list_for_each_entry(fib6_rt_nh, &fib6_rt->nh_list, list) - fib6_info_hw_flags_set(net, fib6_rt_nh->rt, false, trap); + fib6_info_hw_flags_set(net, fib6_rt_nh->rt, false, trap, false); } #else static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data, @@ -666,6 +732,15 @@ static int nsim_fib6_rt_insert(struct nsim_fib_data *data, struct nsim_fib6_rt *fib6_rt, *fib6_rt_old; int err; + if (data->fail_route_offload) { + /* For testing purposes, user set debugfs fail_route_offload + * value to true. Simulate hardware programming latency and then + * fail. + */ + msleep(1); + return -EINVAL; + } + fib6_rt = nsim_fib6_rt_create(data, fib6_event->rt_arr, fib6_event->nrt6); if (IS_ERR(fib6_rt)) @@ -763,7 +838,7 @@ static int nsim_fib6_event(struct nsim_fib_data *data, struct nsim_fib6_event *fib6_event, unsigned long event) { - int err = 0; + int err; if (fib6_event->rt_arr[0]->fib6_src.plen) return 0; @@ -771,9 +846,13 @@ static int nsim_fib6_event(struct nsim_fib_data *data, switch (event) { case FIB_EVENT_ENTRY_REPLACE: err = nsim_fib6_rt_insert(data, fib6_event); + if (err) + goto err_rt_offload_failed_flag_set; break; case FIB_EVENT_ENTRY_APPEND: err = nsim_fib6_rt_append(data, fib6_event); + if (err) + goto err_rt_offload_failed_flag_set; break; case FIB_EVENT_ENTRY_DEL: nsim_fib6_rt_remove(data, fib6_event); @@ -782,6 +861,11 @@ static int nsim_fib6_event(struct nsim_fib_data *data, break; } + return 0; + +err_rt_offload_failed_flag_set: + nsim_fib6_rt_offload_failed_flag_set(data, fib6_event->rt_arr, + fib6_event->nrt6); return err; } @@ -1289,10 +1373,29 @@ static void nsim_fib_event_work(struct work_struct *work) mutex_unlock(&data->fib_lock); } +static int +nsim_fib_debugfs_init(struct nsim_fib_data *data, struct nsim_dev *nsim_dev) +{ + data->ddir = debugfs_create_dir("fib", nsim_dev->ddir); + if (IS_ERR(data->ddir)) + return PTR_ERR(data->ddir); + + data->fail_route_offload = false; + debugfs_create_bool("fail_route_offload", 0600, data->ddir, + &data->fail_route_offload); + return 0; +} + +static void nsim_fib_debugfs_exit(struct nsim_fib_data *data) +{ + debugfs_remove_recursive(data->ddir); +} + struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, struct netlink_ext_ack *extack) { struct nsim_fib_data *data; + struct nsim_dev *nsim_dev; int err; data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -1300,10 +1403,15 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, return ERR_PTR(-ENOMEM); data->devlink = devlink; - err = rhashtable_init(&data->nexthop_ht, &nsim_nexthop_ht_params); + nsim_dev = devlink_priv(devlink); + err = nsim_fib_debugfs_init(data, nsim_dev); if (err) goto err_data_free; + err = rhashtable_init(&data->nexthop_ht, &nsim_nexthop_ht_params); + if (err) + goto err_debugfs_exit; + mutex_init(&data->fib_lock); INIT_LIST_HEAD(&data->fib_rt_list); err = rhashtable_init(&data->fib_rt_ht, &nsim_fib_rt_ht_params); @@ -1364,6 +1472,8 @@ err_rhashtable_nexthop_destroy: rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free, data); mutex_destroy(&data->fib_lock); +err_debugfs_exit: + nsim_fib_debugfs_exit(data); err_data_free: kfree(data); return ERR_PTR(err); @@ -1391,5 +1501,6 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data) WARN_ON_ONCE(!list_empty(&data->fib_event_queue)); WARN_ON_ONCE(!list_empty(&data->fib_rt_list)); mutex_destroy(&data->fib_lock); + nsim_fib_debugfs_exit(data); kfree(data); } diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 8a4ec3222168..0472b3470c59 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -361,96 +361,6 @@ static int bcm54811_config_init(struct phy_device *phydev) return err; } -static int bcm5482_config_init(struct phy_device *phydev) -{ - int err, reg; - - err = bcm54xx_config_init(phydev); - - if (phydev->dev_flags & PHY_BCM_FLAGS_MODE_1000BX) { - /* - * Enable secondary SerDes and its use as an LED source - */ - reg = bcm_phy_read_shadow(phydev, BCM5482_SHD_SSD); - bcm_phy_write_shadow(phydev, BCM5482_SHD_SSD, - reg | - BCM5482_SHD_SSD_LEDM | - BCM5482_SHD_SSD_EN); - - /* - * Enable SGMII slave mode and auto-detection - */ - reg = BCM5482_SSD_SGMII_SLAVE | MII_BCM54XX_EXP_SEL_SSD; - err = bcm_phy_read_exp(phydev, reg); - if (err < 0) - return err; - err = bcm_phy_write_exp(phydev, reg, err | - BCM5482_SSD_SGMII_SLAVE_EN | - BCM5482_SSD_SGMII_SLAVE_AD); - if (err < 0) - return err; - - /* - * Disable secondary SerDes powerdown - */ - reg = BCM5482_SSD_1000BX_CTL | MII_BCM54XX_EXP_SEL_SSD; - err = bcm_phy_read_exp(phydev, reg); - if (err < 0) - return err; - err = bcm_phy_write_exp(phydev, reg, - err & ~BCM5482_SSD_1000BX_CTL_PWRDOWN); - if (err < 0) - return err; - - /* - * Select 1000BASE-X register set (primary SerDes) - */ - reg = bcm_phy_read_shadow(phydev, BCM54XX_SHD_MODE); - bcm_phy_write_shadow(phydev, BCM54XX_SHD_MODE, - reg | BCM54XX_SHD_MODE_1000BX); - - /* - * LED1=ACTIVITYLED, LED3=LINKSPD[2] - * (Use LED1 as secondary SerDes ACTIVITY LED) - */ - bcm_phy_write_shadow(phydev, BCM5482_SHD_LEDS1, - BCM5482_SHD_LEDS1_LED1(BCM_LED_SRC_ACTIVITYLED) | - BCM5482_SHD_LEDS1_LED3(BCM_LED_SRC_LINKSPD2)); - - /* - * Auto-negotiation doesn't seem to work quite right - * in this mode, so we disable it and force it to the - * right speed/duplex setting. Only 'link status' - * is important. - */ - phydev->autoneg = AUTONEG_DISABLE; - phydev->speed = SPEED_1000; - phydev->duplex = DUPLEX_FULL; - } - - return err; -} - -static int bcm5482_read_status(struct phy_device *phydev) -{ - int err; - - err = genphy_read_status(phydev); - - if (phydev->dev_flags & PHY_BCM_FLAGS_MODE_1000BX) { - /* - * Only link status matters for 1000Base-X mode, so force - * 1000 Mbit/s full-duplex status - */ - if (phydev->link) { - phydev->speed = SPEED_1000; - phydev->duplex = DUPLEX_FULL; - } - } - - return err; -} - static int bcm5481_config_aneg(struct phy_device *phydev) { struct device_node *np = phydev->mdio.dev.of_node; @@ -500,6 +410,8 @@ static int bcm54616s_probe(struct phy_device *phydev) */ if (!(val & BCM54616S_100FX_MODE)) phydev->dev_flags |= PHY_BCM_FLAGS_MODE_1000BX; + + phydev->port = PORT_FIBRE; } return 0; @@ -800,8 +712,7 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM5482", /* PHY_GBIT_FEATURES */ - .config_init = bcm5482_config_init, - .read_status = bcm5482_read_status, + .config_init = bcm54xx_config_init, .config_intr = bcm_phy_config_intr, .handle_interrupt = bcm_phy_handle_interrupt, }, { diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index fff371ca1086..be1224b4447b 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -554,6 +554,9 @@ static int dp83822_probe(struct phy_device *phydev) dp83822_of_init(phydev); + if (dp83822->fx_enabled) + phydev->port = PORT_FIBRE; + return 0; } diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c index b30bc142d82e..755220c6451f 100644 --- a/drivers/net/phy/dp83869.c +++ b/drivers/net/phy/dp83869.c @@ -855,6 +855,10 @@ static int dp83869_probe(struct phy_device *phydev) if (ret) return ret; + if (dp83869->mode == DP83869_RGMII_100_BASE || + dp83869->mode == DP83869_RGMII_1000_BASE) + phydev->port = PORT_FIBRE; + return dp83869_config_init(phydev); } diff --git a/drivers/net/phy/icplus.c b/drivers/net/phy/icplus.c index b632947cbcdf..4e15d4d02488 100644 --- a/drivers/net/phy/icplus.c +++ b/drivers/net/phy/icplus.c @@ -37,16 +37,35 @@ MODULE_LICENSE("GPL"); #define IP1001_SPEC_CTRL_STATUS_2 20 /* IP1001 Spec. Control Reg 2 */ #define IP1001_APS_ON 11 /* IP1001 APS Mode bit */ #define IP101A_G_APS_ON BIT(1) /* IP101A/G APS Mode bit */ +#define IP101A_G_AUTO_MDIX_DIS BIT(11) #define IP101A_G_IRQ_CONF_STATUS 0x11 /* Conf Info IRQ & Status Reg */ #define IP101A_G_IRQ_PIN_USED BIT(15) /* INTR pin used */ #define IP101A_G_IRQ_ALL_MASK BIT(11) /* IRQ's inactive */ #define IP101A_G_IRQ_SPEED_CHANGE BIT(2) #define IP101A_G_IRQ_DUPLEX_CHANGE BIT(1) #define IP101A_G_IRQ_LINK_CHANGE BIT(0) +#define IP101A_G_PHY_STATUS 18 +#define IP101A_G_MDIX BIT(9) +#define IP101A_G_PHY_SPEC_CTRL 30 +#define IP101A_G_FORCE_MDIX BIT(3) +#define IP101G_PAGE_CONTROL 0x14 +#define IP101G_PAGE_CONTROL_MASK GENMASK(4, 0) #define IP101G_DIGITAL_IO_SPEC_CTRL 0x1d #define IP101G_DIGITAL_IO_SPEC_CTRL_SEL_INTR32 BIT(2) +#define IP101G_DEFAULT_PAGE 16 + +#define IP101G_P1_CNT_CTRL 17 +#define CNT_CTRL_RX_EN BIT(13) +#define IP101G_P8_CNT_CTRL 17 +#define CNT_CTRL_RDCLR_EN BIT(15) +#define IP101G_CNT_REG 18 + +#define IP175C_PHY_ID 0x02430d80 +#define IP1001_PHY_ID 0x02430d90 +#define IP101A_PHY_ID 0x02430c54 + /* The 32-pin IP101GR package can re-configure the mode of the RXER/INTR_32 pin * (pin number 21). The hardware default is RXER (receive error) mode. But it * can be configured to interrupt mode manually. @@ -57,8 +76,19 @@ enum ip101gr_sel_intr32 { IP101GR_SEL_INTR32_RXER, }; +struct ip101g_hw_stat { + const char *name; + int page; +}; + +static struct ip101g_hw_stat ip101g_hw_stats[] = { + { "phy_crc_errors", 1 }, + { "phy_symbol_errors", 11, }, +}; + struct ip101a_g_phy_priv { enum ip101gr_sel_intr32 sel_intr32; + u64 stats[ARRAY_SIZE(ip101g_hw_stats)]; }; static int ip175c_config_init(struct phy_device *phydev) @@ -116,36 +146,10 @@ static int ip175c_config_init(struct phy_device *phydev) return 0; } -static int ip1xx_reset(struct phy_device *phydev) -{ - int bmcr; - - /* Software Reset PHY */ - bmcr = phy_read(phydev, MII_BMCR); - if (bmcr < 0) - return bmcr; - bmcr |= BMCR_RESET; - bmcr = phy_write(phydev, MII_BMCR, bmcr); - if (bmcr < 0) - return bmcr; - - do { - bmcr = phy_read(phydev, MII_BMCR); - if (bmcr < 0) - return bmcr; - } while (bmcr & BMCR_RESET); - - return 0; -} - static int ip1001_config_init(struct phy_device *phydev) { int c; - c = ip1xx_reset(phydev); - if (c < 0) - return c; - /* Enable Auto Power Saving mode */ c = phy_read(phydev, IP1001_SPEC_CTRL_STATUS_2); if (c < 0) @@ -228,30 +232,30 @@ static int ip101a_g_probe(struct phy_device *phydev) return 0; } -static int ip101a_g_config_init(struct phy_device *phydev) +static int ip101a_g_config_intr_pin(struct phy_device *phydev) { struct ip101a_g_phy_priv *priv = phydev->priv; - int err, c; + int oldpage, err = 0; - c = ip1xx_reset(phydev); - if (c < 0) - return c; + oldpage = phy_select_page(phydev, IP101G_DEFAULT_PAGE); + if (oldpage < 0) + return oldpage; /* configure the RXER/INTR_32 pin of the 32-pin IP101GR if needed: */ switch (priv->sel_intr32) { case IP101GR_SEL_INTR32_RXER: - err = phy_modify(phydev, IP101G_DIGITAL_IO_SPEC_CTRL, - IP101G_DIGITAL_IO_SPEC_CTRL_SEL_INTR32, 0); + err = __phy_modify(phydev, IP101G_DIGITAL_IO_SPEC_CTRL, + IP101G_DIGITAL_IO_SPEC_CTRL_SEL_INTR32, 0); if (err < 0) - return err; + goto out; break; case IP101GR_SEL_INTR32_INTR: - err = phy_modify(phydev, IP101G_DIGITAL_IO_SPEC_CTRL, - IP101G_DIGITAL_IO_SPEC_CTRL_SEL_INTR32, - IP101G_DIGITAL_IO_SPEC_CTRL_SEL_INTR32); + err = __phy_modify(phydev, IP101G_DIGITAL_IO_SPEC_CTRL, + IP101G_DIGITAL_IO_SPEC_CTRL_SEL_INTR32, + IP101G_DIGITAL_IO_SPEC_CTRL_SEL_INTR32); if (err < 0) - return err; + goto out; break; default: @@ -265,17 +269,135 @@ static int ip101a_g_config_init(struct phy_device *phydev) break; } +out: + return phy_restore_page(phydev, oldpage, err); +} + +static int ip101a_config_init(struct phy_device *phydev) +{ + int ret; + /* Enable Auto Power Saving mode */ - c = phy_read(phydev, IP10XX_SPEC_CTRL_STATUS); - c |= IP101A_G_APS_ON; + ret = phy_set_bits(phydev, IP10XX_SPEC_CTRL_STATUS, IP101A_G_APS_ON); + if (ret) + return ret; + + return ip101a_g_config_intr_pin(phydev); +} + +static int ip101g_config_init(struct phy_device *phydev) +{ + int ret; + + /* Enable the PHY counters */ + ret = phy_modify_paged(phydev, 1, IP101G_P1_CNT_CTRL, + CNT_CTRL_RX_EN, CNT_CTRL_RX_EN); + if (ret) + return ret; + + /* Clear error counters on read */ + ret = phy_modify_paged(phydev, 8, IP101G_P8_CNT_CTRL, + CNT_CTRL_RDCLR_EN, CNT_CTRL_RDCLR_EN); + if (ret) + return ret; + + return ip101a_g_config_intr_pin(phydev); +} + +static int ip101a_g_read_status(struct phy_device *phydev) +{ + int oldpage, ret, stat1, stat2; + + ret = genphy_read_status(phydev); + if (ret) + return ret; + + oldpage = phy_select_page(phydev, IP101G_DEFAULT_PAGE); + if (oldpage < 0) + return oldpage; + + ret = __phy_read(phydev, IP10XX_SPEC_CTRL_STATUS); + if (ret < 0) + goto out; + stat1 = ret; + + ret = __phy_read(phydev, IP101A_G_PHY_SPEC_CTRL); + if (ret < 0) + goto out; + stat2 = ret; + + if (stat1 & IP101A_G_AUTO_MDIX_DIS) { + if (stat2 & IP101A_G_FORCE_MDIX) + phydev->mdix_ctrl = ETH_TP_MDI_X; + else + phydev->mdix_ctrl = ETH_TP_MDI; + } else { + phydev->mdix_ctrl = ETH_TP_MDI_AUTO; + } + + if (stat2 & IP101A_G_MDIX) + phydev->mdix = ETH_TP_MDI_X; + else + phydev->mdix = ETH_TP_MDI; + + ret = 0; + +out: + return phy_restore_page(phydev, oldpage, ret); +} + +static int ip101a_g_config_mdix(struct phy_device *phydev) +{ + u16 ctrl = 0, ctrl2 = 0; + int oldpage, ret; + + switch (phydev->mdix_ctrl) { + case ETH_TP_MDI: + ctrl = IP101A_G_AUTO_MDIX_DIS; + break; + case ETH_TP_MDI_X: + ctrl = IP101A_G_AUTO_MDIX_DIS; + ctrl2 = IP101A_G_FORCE_MDIX; + break; + case ETH_TP_MDI_AUTO: + break; + default: + return 0; + } + + oldpage = phy_select_page(phydev, IP101G_DEFAULT_PAGE); + if (oldpage < 0) + return oldpage; - return phy_write(phydev, IP10XX_SPEC_CTRL_STATUS, c); + ret = __phy_modify(phydev, IP10XX_SPEC_CTRL_STATUS, + IP101A_G_AUTO_MDIX_DIS, ctrl); + if (ret) + goto out; + + ret = __phy_modify(phydev, IP101A_G_PHY_SPEC_CTRL, + IP101A_G_FORCE_MDIX, ctrl2); + +out: + return phy_restore_page(phydev, oldpage, ret); +} + +static int ip101a_g_config_aneg(struct phy_device *phydev) +{ + int ret; + + ret = ip101a_g_config_mdix(phydev); + if (ret) + return ret; + + return genphy_config_aneg(phydev); } static int ip101a_g_ack_interrupt(struct phy_device *phydev) { - int err = phy_read(phydev, IP101A_G_IRQ_CONF_STATUS); + int err; + err = phy_read_paged(phydev, IP101G_DEFAULT_PAGE, + IP101A_G_IRQ_CONF_STATUS); if (err < 0) return err; @@ -294,10 +416,12 @@ static int ip101a_g_config_intr(struct phy_device *phydev) /* INTR pin used: Speed/link/duplex will cause an interrupt */ val = IP101A_G_IRQ_PIN_USED; - err = phy_write(phydev, IP101A_G_IRQ_CONF_STATUS, val); + err = phy_write_paged(phydev, IP101G_DEFAULT_PAGE, + IP101A_G_IRQ_CONF_STATUS, val); } else { val = IP101A_G_IRQ_ALL_MASK; - err = phy_write(phydev, IP101A_G_IRQ_CONF_STATUS, val); + err = phy_write_paged(phydev, IP101G_DEFAULT_PAGE, + IP101A_G_IRQ_CONF_STATUS, val); if (err) return err; @@ -311,7 +435,8 @@ static irqreturn_t ip101a_g_handle_interrupt(struct phy_device *phydev) { int irq_status; - irq_status = phy_read(phydev, IP101A_G_IRQ_CONF_STATUS); + irq_status = phy_read_paged(phydev, IP101G_DEFAULT_PAGE, + IP101A_G_IRQ_CONF_STATUS); if (irq_status < 0) { phy_error(phydev); return IRQ_NONE; @@ -327,34 +452,171 @@ static irqreturn_t ip101a_g_handle_interrupt(struct phy_device *phydev) return IRQ_HANDLED; } +/* The IP101A doesn't really have a page register. We just pretend to have one + * so we can use the paged versions of the callbacks of the IP101G. + */ +static int ip101a_read_page(struct phy_device *phydev) +{ + return IP101G_DEFAULT_PAGE; +} + +static int ip101a_write_page(struct phy_device *phydev, int page) +{ + WARN_ONCE(page != IP101G_DEFAULT_PAGE, "wrong page selected\n"); + + return 0; +} + +static int ip101g_read_page(struct phy_device *phydev) +{ + return __phy_read(phydev, IP101G_PAGE_CONTROL); +} + +static int ip101g_write_page(struct phy_device *phydev, int page) +{ + return __phy_write(phydev, IP101G_PAGE_CONTROL, page); +} + +static int ip101a_g_has_page_register(struct phy_device *phydev) +{ + int oldval, val, ret; + + oldval = phy_read(phydev, IP101G_PAGE_CONTROL); + if (oldval < 0) + return oldval; + + ret = phy_write(phydev, IP101G_PAGE_CONTROL, 0xffff); + if (ret) + return ret; + + val = phy_read(phydev, IP101G_PAGE_CONTROL); + if (val < 0) + return val; + + ret = phy_write(phydev, IP101G_PAGE_CONTROL, oldval); + if (ret) + return ret; + + return val == IP101G_PAGE_CONTROL_MASK; +} + +static int ip101a_g_match_phy_device(struct phy_device *phydev, bool ip101a) +{ + int ret; + + if (phydev->phy_id != IP101A_PHY_ID) + return 0; + + /* The IP101A and the IP101G share the same PHY identifier.The IP101G + * seems to be a successor of the IP101A and implements more functions. + * Amongst other things there is a page select register, which is not + * available on the IP101A. Use this to distinguish these two. + */ + ret = ip101a_g_has_page_register(phydev); + if (ret < 0) + return ret; + + return ip101a == !ret; +} + +static int ip101a_match_phy_device(struct phy_device *phydev) +{ + return ip101a_g_match_phy_device(phydev, true); +} + +static int ip101g_match_phy_device(struct phy_device *phydev) +{ + return ip101a_g_match_phy_device(phydev, false); +} + +static int ip101g_get_sset_count(struct phy_device *phydev) +{ + return ARRAY_SIZE(ip101g_hw_stats); +} + +static void ip101g_get_strings(struct phy_device *phydev, u8 *data) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ip101g_hw_stats); i++) + strscpy(data + i * ETH_GSTRING_LEN, + ip101g_hw_stats[i].name, ETH_GSTRING_LEN); +} + +static u64 ip101g_get_stat(struct phy_device *phydev, int i) +{ + struct ip101g_hw_stat stat = ip101g_hw_stats[i]; + struct ip101a_g_phy_priv *priv = phydev->priv; + int val; + u64 ret; + + val = phy_read_paged(phydev, stat.page, IP101G_CNT_REG); + if (val < 0) { + ret = U64_MAX; + } else { + priv->stats[i] += val; + ret = priv->stats[i]; + } + + return ret; +} + +static void ip101g_get_stats(struct phy_device *phydev, + struct ethtool_stats *stats, u64 *data) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ip101g_hw_stats); i++) + data[i] = ip101g_get_stat(phydev, i); +} + static struct phy_driver icplus_driver[] = { { - .phy_id = 0x02430d80, + PHY_ID_MATCH_MODEL(IP175C_PHY_ID), .name = "ICPlus IP175C", - .phy_id_mask = 0x0ffffff0, /* PHY_BASIC_FEATURES */ - .config_init = &ip175c_config_init, - .config_aneg = &ip175c_config_aneg, - .read_status = &ip175c_read_status, + .config_init = ip175c_config_init, + .config_aneg = ip175c_config_aneg, + .read_status = ip175c_read_status, .suspend = genphy_suspend, .resume = genphy_resume, }, { - .phy_id = 0x02430d90, + PHY_ID_MATCH_MODEL(IP1001_PHY_ID), .name = "ICPlus IP1001", - .phy_id_mask = 0x0ffffff0, /* PHY_GBIT_FEATURES */ - .config_init = &ip1001_config_init, + .config_init = ip1001_config_init, + .soft_reset = genphy_soft_reset, .suspend = genphy_suspend, .resume = genphy_resume, }, { - .phy_id = 0x02430c54, - .name = "ICPlus IP101A/G", - .phy_id_mask = 0x0ffffff0, - /* PHY_BASIC_FEATURES */ + .name = "ICPlus IP101A", + .match_phy_device = ip101a_match_phy_device, + .probe = ip101a_g_probe, + .read_page = ip101a_read_page, + .write_page = ip101a_write_page, + .config_intr = ip101a_g_config_intr, + .handle_interrupt = ip101a_g_handle_interrupt, + .config_init = ip101a_config_init, + .config_aneg = ip101a_g_config_aneg, + .read_status = ip101a_g_read_status, + .soft_reset = genphy_soft_reset, + .suspend = genphy_suspend, + .resume = genphy_resume, +}, { + .name = "ICPlus IP101G", + .match_phy_device = ip101g_match_phy_device, .probe = ip101a_g_probe, + .read_page = ip101g_read_page, + .write_page = ip101g_write_page, .config_intr = ip101a_g_config_intr, .handle_interrupt = ip101a_g_handle_interrupt, - .config_init = &ip101a_g_config_init, + .config_init = ip101g_config_init, + .config_aneg = ip101a_g_config_aneg, + .read_status = ip101a_g_read_status, + .soft_reset = genphy_soft_reset, + .get_sset_count = ip101g_get_sset_count, + .get_strings = ip101g_get_strings, + .get_stats = ip101g_get_stats, .suspend = genphy_suspend, .resume = genphy_resume, } }; @@ -362,9 +624,9 @@ static struct phy_driver icplus_driver[] = { module_phy_driver(icplus_driver); static struct mdio_device_id __maybe_unused icplus_tbl[] = { - { 0x02430d80, 0x0ffffff0 }, - { 0x02430d90, 0x0ffffff0 }, - { 0x02430c54, 0x0ffffff0 }, + { PHY_ID_MATCH_MODEL(IP175C_PHY_ID) }, + { PHY_ID_MATCH_MODEL(IP1001_PHY_ID) }, + { PHY_ID_MATCH_EXACT(IP101A_PHY_ID) }, { } }; diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c index 0ee23d29c0d4..bde3356a2f86 100644 --- a/drivers/net/phy/lxt.c +++ b/drivers/net/phy/lxt.c @@ -292,6 +292,7 @@ static int lxt973_probe(struct phy_device *phydev) phy_write(phydev, MII_BMCR, val); /* Remember that the port is in fiber mode. */ phydev->priv = lxt973_probe; + phydev->port = PORT_FIBRE; } else { phydev->priv = NULL; } diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 620052c023a5..3238d0fbf437 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1552,6 +1552,7 @@ static int marvell_read_status_page(struct phy_device *phydev, int page) phydev->asym_pause = 0; phydev->speed = SPEED_UNKNOWN; phydev->duplex = DUPLEX_UNKNOWN; + phydev->port = fiber ? PORT_FIBRE : PORT_TP; if (phydev->autoneg == AUTONEG_ENABLE) err = marvell_read_status_page_an(phydev, fiber, status); @@ -2852,7 +2853,6 @@ static struct phy_driver marvell_drivers[] = { .probe = marvell_probe, .config_init = m88e1145_config_init, .config_aneg = m88e1101_config_aneg, - .read_status = genphy_read_status, .config_intr = marvell_config_intr, .handle_interrupt = marvell_handle_interrupt, .resume = genphy_resume, diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 1901ba277413..b1bb9b8e1e4e 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -631,6 +631,7 @@ static int mv3310_read_status_10gbaser(struct phy_device *phydev) phydev->link = 1; phydev->speed = SPEED_10000; phydev->duplex = DUPLEX_FULL; + phydev->port = PORT_FIBRE; return 0; } @@ -690,6 +691,7 @@ static int mv3310_read_status_copper(struct phy_device *phydev) phydev->duplex = cssr1 & MV_PCS_CSSR1_DUPLEX_FULL ? DUPLEX_FULL : DUPLEX_HALF; + phydev->port = PORT_TP; phydev->mdix = cssr1 & MV_PCS_CSSR1_MDIX ? ETH_TP_MDI_X : ETH_TP_MDI; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 39c7c786a912..7ec6f70d6a82 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -341,14 +341,19 @@ static int kszphy_config_init(struct phy_device *phydev) return kszphy_config_reset(phydev); } +static int ksz8041_fiber_mode(struct phy_device *phydev) +{ + struct device_node *of_node = phydev->mdio.dev.of_node; + + return of_property_read_bool(of_node, "micrel,fiber-mode"); +} + static int ksz8041_config_init(struct phy_device *phydev) { __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; - struct device_node *of_node = phydev->mdio.dev.of_node; - /* Limit supported and advertised modes in fiber mode */ - if (of_property_read_bool(of_node, "micrel,fiber-mode")) { + if (ksz8041_fiber_mode(phydev)) { phydev->dev_flags |= MICREL_PHY_FXEN; linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, mask); linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, mask); @@ -1176,6 +1181,9 @@ static int kszphy_probe(struct phy_device *phydev) } } + if (ksz8041_fiber_mode(phydev)) + phydev->port = PORT_FIBRE; + /* Support legacy board-file configuration */ if (phydev->dev_flags & MICREL_PHY_50MHZ_CLK) { priv->rmii_ref_clk_sel = true; @@ -1368,7 +1376,6 @@ static struct phy_driver ksphy_driver[] = { .driver_data = &ksz9021_type, .probe = kszphy_probe, .config_init = ksz9131_config_init, - .read_status = genphy_read_status, .config_intr = kszphy_config_intr, .handle_interrupt = kszphy_handle_interrupt, .get_sset_count = kszphy_get_sset_count, diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 9cb7e4dbf8f4..fdb914b5b857 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -308,7 +308,7 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev, if (phydev->interface == PHY_INTERFACE_MODE_MOCA) cmd->base.port = PORT_BNC; else - cmd->base.port = PORT_MII; + cmd->base.port = phydev->port; cmd->base.transceiver = phy_is_internal(phydev) ? XCVR_INTERNAL : XCVR_EXTERNAL; cmd->base.phy_address = phydev->mdio.addr; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 8447e56ba572..30a20a29ae05 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -606,6 +606,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, dev->pause = 0; dev->asym_pause = 0; dev->link = 0; + dev->port = PORT_TP; dev->interface = PHY_INTERFACE_MODE_GMII; dev->autoneg = AUTONEG_ENABLE; @@ -1403,6 +1404,14 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, phydev->state = PHY_READY; + /* Port is set to PORT_TP by default and the actual PHY driver will set + * it to different value depending on the PHY configuration. If we have + * the generic PHY driver we can't figure it out, thus set the old + * legacy PORT_MII value. + */ + if (using_genphy) + phydev->port = PORT_MII; + /* Initial carrier state is off as the phy is about to be * (re)initialized. */ diff --git a/drivers/net/tap.c b/drivers/net/tap.c index ff4aa35979a1..8e3a28ba6b28 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1090,10 +1090,9 @@ static long tap_ioctl(struct file *file, unsigned int cmd, return -ENOLINK; } ret = 0; - u = tap->dev->type; + dev_get_mac_address(&sa, dev_net(tap->dev), tap->dev->name); if (copy_to_user(&ifr->ifr_name, tap->dev->name, IFNAMSIZ) || - copy_to_user(&ifr->ifr_hwaddr.sa_data, tap->dev->dev_addr, ETH_ALEN) || - put_user(u, &ifr->ifr_hwaddr.sa_family)) + copy_to_user(&ifr->ifr_hwaddr, &sa, sizeof(sa))) ret = -EFAULT; tap_put_tap_dev(tap); rtnl_unlock(); @@ -1108,7 +1107,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd, rtnl_unlock(); return -ENOLINK; } - ret = dev_set_mac_address(tap->dev, &sa, NULL); + ret = dev_set_mac_address_user(tap->dev, &sa, NULL); tap_put_tap_dev(tap); rtnl_unlock(); return ret; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 62690baa19bc..fc86da7f1628 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3107,15 +3107,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, case SIOCGIFHWADDR: /* Get hw address */ - memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN); - ifr.ifr_hwaddr.sa_family = tun->dev->type; + dev_get_mac_address(&ifr.ifr_hwaddr, net, tun->dev->name); if (copy_to_user(argp, &ifr, ifreq_len)) ret = -EFAULT; break; case SIOCSIFHWADDR: /* Set hw address */ - ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr, NULL); + ret = dev_set_mac_address_user(tun->dev, &ifr.ifr_hwaddr, NULL); break; case TUNGETSNDBUF: diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index c8b2b60d2183..6c3d8c2abd38 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1392,6 +1392,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1e2d, 0x0082, 5)}, /* Cinterion PHxx,PXxx (2 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0083, 4)}, /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/ {QMI_QUIRK_SET_DTR(0x1e2d, 0x00b0, 4)}, /* Cinterion CLS8 */ + {QMI_FIXED_INTF(0x1e2d, 0x00b7, 0)}, /* Cinterion MV31 RmNet */ {QMI_FIXED_INTF(0x413c, 0x81a2, 8)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a3, 8)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a4, 8)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c index bb164805804e..4aaa6388b9ee 100644 --- a/drivers/net/wan/hdlc_x25.c +++ b/drivers/net/wan/hdlc_x25.c @@ -169,11 +169,11 @@ static int x25_open(struct net_device *dev) result = lapb_register(dev, &cb); if (result != LAPB_OK) - return result; + return -ENOMEM; result = lapb_getparms(dev, ¶ms); if (result != LAPB_OK) - return result; + return -EINVAL; if (state(hdlc)->settings.dce) params.mode = params.mode | LAPB_DCE; @@ -188,7 +188,7 @@ static int x25_open(struct net_device *dev) result = lapb_setparms(dev, ¶ms); if (result != LAPB_OK) - return result; + return -EINVAL; return 0; } diff --git a/drivers/net/wireless/ath/ath9k/Kconfig b/drivers/net/wireless/ath/ath9k/Kconfig index a84bb9b6573f..e150d82eddb6 100644 --- a/drivers/net/wireless/ath/ath9k/Kconfig +++ b/drivers/net/wireless/ath/ath9k/Kconfig @@ -21,11 +21,9 @@ config ATH9K_BTCOEX_SUPPORT config ATH9K tristate "Atheros 802.11n wireless cards support" depends on MAC80211 && HAS_DMA + select MAC80211_LEDS if LEDS_CLASS=y || LEDS_CLASS=MAC80211 select ATH9K_HW select ATH9K_COMMON - imply NEW_LEDS - imply LEDS_CLASS - imply MAC80211_LEDS help This module adds support for wireless adapters based on Atheros IEEE 802.11n AR5008, AR9001 and AR9002 family @@ -176,11 +174,9 @@ config ATH9K_PCI_NO_EEPROM config ATH9K_HTC tristate "Atheros HTC based wireless cards support" depends on USB && MAC80211 + select MAC80211_LEDS if LEDS_CLASS=y || LEDS_CLASS=MAC80211 select ATH9K_HW select ATH9K_COMMON - imply NEW_LEDS - imply LEDS_CLASS - imply MAC80211_LEDS help Support for Atheros HTC based cards. Chipsets supported: AR9271 diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 0a6a508fbe05..19098b852d0a 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -513,15 +513,17 @@ static void mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data, int len, bool more) { - struct page *page = virt_to_head_page(data); - int offset = data - page_address(page); struct sk_buff *skb = q->rx_head; struct skb_shared_info *shinfo = skb_shinfo(skb); if (shinfo->nr_frags < ARRAY_SIZE(shinfo->frags)) { - offset += q->buf_offset; + struct page *page = virt_to_head_page(data); + int offset = data - page_address(page) + q->buf_offset; + skb_add_rx_frag(skb, shinfo->nr_frags, page, offset, len, q->buf_size); + } else { + skb_free_frag(data); } if (more) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 08b0e3d0b7eb..193b723fe3bd 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -162,13 +162,15 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id) { struct xenvif_queue *queue = dev_id; int old; + bool has_rx, has_tx; old = atomic_fetch_or(NETBK_COMMON_EOI, &queue->eoi_pending); WARN(old, "Interrupt while EOI pending\n"); - /* Use bitwise or as we need to call both functions. */ - if ((!xenvif_handle_tx_interrupt(queue) | - !xenvif_handle_rx_interrupt(queue))) { + has_tx = xenvif_handle_tx_interrupt(queue); + has_rx = xenvif_handle_rx_interrupt(queue); + + if (!has_rx && !has_tx) { atomic_andnot(NETBK_COMMON_EOI, &queue->eoi_pending); xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); } @@ -628,13 +630,13 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, unsigned int evtchn) { struct net_device *dev = vif->dev; + struct xenbus_device *xendev = xenvif_to_xenbus_device(vif); void *addr; struct xen_netif_ctrl_sring *shared; RING_IDX rsp_prod, req_prod; int err; - err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), - &ring_ref, 1, &addr); + err = xenbus_map_ring_valloc(xendev, &ring_ref, 1, &addr); if (err) goto err; @@ -648,7 +650,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, if (req_prod - rsp_prod > RING_SIZE(&vif->ctrl)) goto err_unmap; - err = bind_interdomain_evtchn_to_irq_lateeoi(vif->domid, evtchn); + err = bind_interdomain_evtchn_to_irq_lateeoi(xendev, evtchn); if (err < 0) goto err_unmap; @@ -671,8 +673,7 @@ err_deinit: vif->ctrl_irq = 0; err_unmap: - xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), - vif->ctrl.sring); + xenbus_unmap_ring_vfree(xendev, vif->ctrl.sring); vif->ctrl.sring = NULL; err: @@ -717,6 +718,7 @@ int xenvif_connect_data(struct xenvif_queue *queue, unsigned int tx_evtchn, unsigned int rx_evtchn) { + struct xenbus_device *dev = xenvif_to_xenbus_device(queue->vif); struct task_struct *task; int err; @@ -753,7 +755,7 @@ int xenvif_connect_data(struct xenvif_queue *queue, if (tx_evtchn == rx_evtchn) { /* feature-split-event-channels == 0 */ err = bind_interdomain_evtchn_to_irqhandler_lateeoi( - queue->vif->domid, tx_evtchn, xenvif_interrupt, 0, + dev, tx_evtchn, xenvif_interrupt, 0, queue->name, queue); if (err < 0) goto err; @@ -764,7 +766,7 @@ int xenvif_connect_data(struct xenvif_queue *queue, snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name), "%s-tx", queue->name); err = bind_interdomain_evtchn_to_irqhandler_lateeoi( - queue->vif->domid, tx_evtchn, xenvif_tx_interrupt, 0, + dev, tx_evtchn, xenvif_tx_interrupt, 0, queue->tx_irq_name, queue); if (err < 0) goto err; @@ -774,7 +776,7 @@ int xenvif_connect_data(struct xenvif_queue *queue, snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name), "%s-rx", queue->name); err = bind_interdomain_evtchn_to_irqhandler_lateeoi( - queue->vif->domid, rx_evtchn, xenvif_rx_interrupt, 0, + dev, rx_evtchn, xenvif_rx_interrupt, 0, queue->rx_irq_name, queue); if (err < 0) goto err; diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index b8febe1d1bfd..accc991d153f 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -38,10 +38,15 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) RING_IDX prod, cons; struct sk_buff *skb; int needed; + unsigned long flags; + + spin_lock_irqsave(&queue->rx_queue.lock, flags); skb = skb_peek(&queue->rx_queue); - if (!skb) + if (!skb) { + spin_unlock_irqrestore(&queue->rx_queue.lock, flags); return false; + } needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); if (skb_is_gso(skb)) @@ -49,6 +54,8 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) if (skb->sw_hash) needed++; + spin_unlock_irqrestore(&queue->rx_queue.lock, flags); + do { prod = queue->rx.sring->req_prod; cons = queue->rx.req_cons; diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c index 807eae04c1e3..1cba8f69d3ae 100644 --- a/drivers/nfc/st-nci/se.c +++ b/drivers/nfc/st-nci/se.c @@ -276,7 +276,6 @@ static int st_nci_hci_apdu_reader_event_received(struct nci_dev *ndev, u8 event, struct sk_buff *skb) { - int r = 0; struct st_nci_info *info = nci_get_drvdata(ndev); pr_debug("apdu reader gate event: %x\n", event); @@ -298,7 +297,7 @@ static int st_nci_hci_apdu_reader_event_received(struct nci_dev *ndev, } kfree_skb(skb); - return r; + return 0; } /* diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index b59032e0859b..9d208570d059 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -335,16 +335,16 @@ static ssize_t state_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(state); -static ssize_t available_slots_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t __available_slots_show(struct nvdimm_drvdata *ndd, char *buf) { - struct nvdimm_drvdata *ndd = dev_get_drvdata(dev); + struct device *dev; ssize_t rc; u32 nfree; if (!ndd) return -ENXIO; + dev = ndd->dev; nvdimm_bus_lock(dev); nfree = nd_label_nfree(ndd); if (nfree - 1 > nfree) { @@ -356,6 +356,18 @@ static ssize_t available_slots_show(struct device *dev, nvdimm_bus_unlock(dev); return rc; } + +static ssize_t available_slots_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + ssize_t rc; + + nd_device_lock(dev); + rc = __available_slots_show(dev_get_drvdata(dev), buf); + nd_device_unlock(dev); + + return rc; +} static DEVICE_ATTR_RO(available_slots); __weak ssize_t security_show(struct device *dev, diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 6da67f4d641a..2403b71b601e 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1635,11 +1635,11 @@ static umode_t namespace_visible(struct kobject *kobj, return a->mode; } - if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr - || a == &dev_attr_holder.attr - || a == &dev_attr_holder_class.attr - || a == &dev_attr_force_raw.attr - || a == &dev_attr_mode.attr) + /* base is_namespace_io() attributes */ + if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr || + a == &dev_attr_holder.attr || a == &dev_attr_holder_class.attr || + a == &dev_attr_force_raw.attr || a == &dev_attr_mode.attr || + a == &dev_attr_resource.attr) return a->mode; return 0; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 875076b0ea6c..f33bdae626ba 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -23,7 +23,6 @@ #include <linux/uio.h> #include <linux/dax.h> #include <linux/nd.h> -#include <linux/backing-dev.h> #include <linux/mm.h> #include <asm/cacheflush.h> #include "pmem.h" diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 81e6389b2042..6bad4d4dcdf0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3242,6 +3242,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */ .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ @@ -3259,6 +3261,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1d97, 0x2263), /* SPCC */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001), .driver_data = NVME_QUIRK_SINGLE_VECTOR }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index dc1f0f647189..aacf06f0b431 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -305,7 +305,7 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) length = cmd->pdu_len; cmd->nr_mapped = DIV_ROUND_UP(length, PAGE_SIZE); offset = cmd->rbytes_done; - cmd->sg_idx = DIV_ROUND_UP(offset, PAGE_SIZE); + cmd->sg_idx = offset / PAGE_SIZE; sg_offset = offset % PAGE_SIZE; sg = &cmd->req.sg[cmd->sg_idx]; @@ -318,6 +318,7 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) length -= iov_len; sg = sg_next(sg); iov++; + sg_offset = 0; } iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov, diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index b9fecc25d213..790393d1e318 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1558,7 +1558,6 @@ int pci_save_state(struct pci_dev *dev) return i; pci_save_ltr_state(dev); - pci_save_aspm_l1ss_state(dev); pci_save_dpc_state(dev); pci_save_aer_state(dev); pci_save_ptm_state(dev); @@ -1665,7 +1664,6 @@ void pci_restore_state(struct pci_dev *dev) * LTR itself (in the PCIe capability). */ pci_restore_ltr_state(dev); - pci_restore_aspm_l1ss_state(dev); pci_restore_pcie_state(dev); pci_restore_pasid_state(dev); @@ -3353,11 +3351,6 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) if (error) pci_err(dev, "unable to allocate suspend buffer for LTR\n"); - error = pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_L1SS, - 2 * sizeof(u32)); - if (error) - pci_err(dev, "unable to allocate suspend buffer for ASPM-L1SS\n"); - pci_allocate_vc_save_buffers(dev); } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 5c59365092fa..a7bdf0b1d45d 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -582,15 +582,11 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev); void pcie_aspm_exit_link_state(struct pci_dev *pdev); void pcie_aspm_pm_state_change(struct pci_dev *pdev); void pcie_aspm_powersave_config_link(struct pci_dev *pdev); -void pci_save_aspm_l1ss_state(struct pci_dev *dev); -void pci_restore_aspm_l1ss_state(struct pci_dev *dev); #else static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { } static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { } -static inline void pci_save_aspm_l1ss_state(struct pci_dev *dev) { } -static inline void pci_restore_aspm_l1ss_state(struct pci_dev *dev) { } #endif #ifdef CONFIG_PCIE_ECRC diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index a08e7d6dc248..ac0557a305af 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -734,50 +734,6 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state) PCI_L1SS_CTL1_L1SS_MASK, val); } -void pci_save_aspm_l1ss_state(struct pci_dev *dev) -{ - int aspm_l1ss; - struct pci_cap_saved_state *save_state; - u32 *cap; - - if (!pci_is_pcie(dev)) - return; - - aspm_l1ss = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_L1SS); - if (!aspm_l1ss) - return; - - save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_L1SS); - if (!save_state) - return; - - cap = (u32 *)&save_state->cap.data[0]; - pci_read_config_dword(dev, aspm_l1ss + PCI_L1SS_CTL1, cap++); - pci_read_config_dword(dev, aspm_l1ss + PCI_L1SS_CTL2, cap++); -} - -void pci_restore_aspm_l1ss_state(struct pci_dev *dev) -{ - int aspm_l1ss; - struct pci_cap_saved_state *save_state; - u32 *cap; - - if (!pci_is_pcie(dev)) - return; - - aspm_l1ss = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_L1SS); - if (!aspm_l1ss) - return; - - save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_L1SS); - if (!save_state) - return; - - cap = (u32 *)&save_state->cap.data[0]; - pci_write_config_dword(dev, aspm_l1ss + PCI_L1SS_CTL1, *cap++); - pci_write_config_dword(dev, aspm_l1ss + PCI_L1SS_CTL2, *cap++); -} - static void pcie_config_aspm_dev(struct pci_dev *pdev, u32 val) { pcie_capability_clear_and_set_word(pdev, PCI_EXP_LNKCTL, diff --git a/drivers/platform/x86/dell-wmi-sysman/sysman.c b/drivers/platform/x86/dell-wmi-sysman/sysman.c index dc6dd531c996..cb81010ba1a2 100644 --- a/drivers/platform/x86/dell-wmi-sysman/sysman.c +++ b/drivers/platform/x86/dell-wmi-sysman/sysman.c @@ -419,13 +419,17 @@ static int init_bios_attributes(int attr_type, const char *guid) return retval; /* need to use specific instance_id and guid combination to get right data */ obj = get_wmiobj_pointer(instance_id, guid); - if (!obj) + if (!obj || obj->type != ACPI_TYPE_PACKAGE) return -ENODEV; elements = obj->package.elements; mutex_lock(&wmi_priv.mutex); while (elements) { /* sanity checking */ + if (elements[ATTR_NAME].type != ACPI_TYPE_STRING) { + pr_debug("incorrect element type\n"); + goto nextobj; + } if (strlen(elements[ATTR_NAME].string.pointer) == 0) { pr_debug("empty attribute found\n"); goto nextobj; diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 18bf8aeb5f87..e94e59283ecb 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -32,6 +32,10 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("wmi:95F24279-4D7B-4334-9387-ACCDC67EF61C"); MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4"); +static int enable_tablet_mode_sw = -1; +module_param(enable_tablet_mode_sw, int, 0444); +MODULE_PARM_DESC(enable_tablet_mode_sw, "Enable SW_TABLET_MODE reporting (-1=auto, 0=no, 1=yes)"); + #define HPWMI_EVENT_GUID "95F24279-4D7B-4334-9387-ACCDC67EF61C" #define HPWMI_BIOS_GUID "5FB7F034-2C63-45e9-BE91-3D44E2C707E4" @@ -654,10 +658,12 @@ static int __init hp_wmi_input_setup(void) } /* Tablet mode */ - val = hp_wmi_hw_state(HPWMI_TABLET_MASK); - if (!(val < 0)) { - __set_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit); - input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, val); + if (enable_tablet_mode_sw > 0) { + val = hp_wmi_hw_state(HPWMI_TABLET_MASK); + if (val >= 0) { + __set_bit(SW_TABLET_MODE, hp_wmi_input_dev->swbit); + input_report_switch(hp_wmi_input_dev, SW_TABLET_MODE, val); + } } err = sparse_keymap_setup(hp_wmi_input_dev, hp_wmi_keymap, NULL); diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 68a9ac6f2fe1..a701dae653c4 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -805,8 +805,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) spin_lock_irq(&rtc_lock); - /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */ - if ((CMOS_READ(RTC_VALID) & 0x7f) != 0) { + /* Ensure that the RTC is accessible. Bit 6 must be 0! */ + if ((CMOS_READ(RTC_VALID) & 0x40) != 0) { spin_unlock_irq(&rtc_lock); dev_warn(dev, "not accessible\n"); retval = -ENXIO; diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index f83c13818af3..dcfaf09946ee 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -21,8 +21,8 @@ unsigned int mc146818_get_time(struct rtc_time *time) again: spin_lock_irqsave(&rtc_lock, flags); - /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */ - if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x7f) != 0)) { + /* Ensure that the RTC is accessible. Bit 6 must be 0! */ + if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) { spin_unlock_irqrestore(&rtc_lock, flags); memset(time, 0xff, sizeof(*time)); return 0; diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 1cb82fa6a60e..39d147e251bf 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -559,6 +559,9 @@ __lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, return -ENODEV; } + if (!vport->phba->sli4_hba.nvmels_wq) + return -ENOMEM; + /* * there are two dma buf in the request, actually there is one and * the second one is just the start address + cmd size. diff --git a/drivers/soc/sunxi/sunxi_mbus.c b/drivers/soc/sunxi/sunxi_mbus.c index e9925c8487d7..d90e4a264b6f 100644 --- a/drivers/soc/sunxi/sunxi_mbus.c +++ b/drivers/soc/sunxi/sunxi_mbus.c @@ -23,12 +23,7 @@ static const char * const sunxi_mbus_devices[] = { "allwinner,sun7i-a20-display-engine", "allwinner,sun8i-a23-display-engine", "allwinner,sun8i-a33-display-engine", - "allwinner,sun8i-a83t-display-engine", - "allwinner,sun8i-h3-display-engine", - "allwinner,sun8i-r40-display-engine", - "allwinner,sun8i-v3s-display-engine", "allwinner,sun9i-a80-display-engine", - "allwinner,sun50i-a64-display-engine", /* * And now we have the regular devices connected to the MBUS diff --git a/drivers/soc/ti/omap_prm.c b/drivers/soc/ti/omap_prm.c index 77f0051358f1..bf1468e5bccb 100644 --- a/drivers/soc/ti/omap_prm.c +++ b/drivers/soc/ti/omap_prm.c @@ -860,6 +860,7 @@ static int omap_prm_reset_init(struct platform_device *pdev, const struct omap_rst_map *map; struct ti_prm_platform_data *pdata = dev_get_platdata(&pdev->dev); char buf[32]; + u32 v; /* * Check if we have controllable resets. If either rstctrl is non-zero @@ -907,6 +908,16 @@ static int omap_prm_reset_init(struct platform_device *pdev, map++; } + /* Quirk handling to assert rst_map_012 bits on reset and avoid errors */ + if (prm->data->rstmap == rst_map_012) { + v = readl_relaxed(reset->prm->base + reset->prm->data->rstctrl); + if ((v & reset->mask) != reset->mask) { + dev_dbg(&pdev->dev, "Asserting all resets: %08x\n", v); + writel_relaxed(reset->mask, reset->prm->base + + reset->prm->data->rstctrl); + } + } + return devm_reset_controller_register(&pdev->dev, &reset->rcdev); } diff --git a/drivers/thunderbolt/acpi.c b/drivers/thunderbolt/acpi.c index a5f988a9f948..b5442f979b4d 100644 --- a/drivers/thunderbolt/acpi.c +++ b/drivers/thunderbolt/acpi.c @@ -56,7 +56,7 @@ static acpi_status tb_acpi_add_link(acpi_handle handle, u32 level, void *data, * managed with the xHCI and the SuperSpeed hub so we create the * link from xHCI instead. */ - while (!dev_is_pci(dev)) + while (dev && !dev_is_pci(dev)) dev = dev->parent; if (!dev) diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 134dc2005ce9..c9f6e9758288 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -1329,14 +1329,17 @@ static int usblp_set_protocol(struct usblp *usblp, int protocol) if (protocol < USBLP_FIRST_PROTOCOL || protocol > USBLP_LAST_PROTOCOL) return -EINVAL; - alts = usblp->protocol[protocol].alt_setting; - if (alts < 0) - return -EINVAL; - r = usb_set_interface(usblp->dev, usblp->ifnum, alts); - if (r < 0) { - printk(KERN_ERR "usblp: can't set desired altsetting %d on interface %d\n", - alts, usblp->ifnum); - return r; + /* Don't unnecessarily set the interface if there's a single alt. */ + if (usblp->intf->num_altsetting > 1) { + alts = usblp->protocol[protocol].alt_setting; + if (alts < 0) + return -EINVAL; + r = usb_set_interface(usblp->dev, usblp->ifnum, alts); + if (r < 0) { + printk(KERN_ERR "usblp: can't set desired altsetting %d on interface %d\n", + alts, usblp->ifnum); + return r; + } } usblp->bidir = (usblp->protocol[protocol].epread != NULL); diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 0a0d11151cfb..ad4c94366dad 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -1543,7 +1543,6 @@ static void dwc2_hsotg_complete_oursetup(struct usb_ep *ep, static struct dwc2_hsotg_ep *ep_from_windex(struct dwc2_hsotg *hsotg, u32 windex) { - struct dwc2_hsotg_ep *ep; int dir = (windex & USB_DIR_IN) ? 1 : 0; int idx = windex & 0x7F; @@ -1553,12 +1552,7 @@ static struct dwc2_hsotg_ep *ep_from_windex(struct dwc2_hsotg *hsotg, if (idx > hsotg->num_of_eps) return NULL; - ep = index_to_ep(hsotg, idx, dir); - - if (idx && ep->dir_in != dir) - return NULL; - - return ep; + return index_to_ep(hsotg, idx, dir); } /** diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 841daec70b6e..3101f0dcf6ae 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1758,7 +1758,7 @@ static int dwc3_resume_common(struct dwc3 *dwc, pm_message_t msg) if (PMSG_IS_AUTO(msg)) break; - ret = dwc3_core_init(dwc); + ret = dwc3_core_init_for_resume(dwc); if (ret) return ret; diff --git a/drivers/usb/gadget/legacy/ether.c b/drivers/usb/gadget/legacy/ether.c index 30313b233680..99c7fc0d1d59 100644 --- a/drivers/usb/gadget/legacy/ether.c +++ b/drivers/usb/gadget/legacy/ether.c @@ -403,8 +403,10 @@ static int eth_bind(struct usb_composite_dev *cdev) struct usb_descriptor_header *usb_desc; usb_desc = usb_otg_descriptor_alloc(gadget); - if (!usb_desc) + if (!usb_desc) { + status = -ENOMEM; goto fail1; + } usb_otg_descriptor_init(gadget, usb_desc); otg_desc[0] = usb_desc; otg_desc[1] = NULL; diff --git a/drivers/usb/gadget/udc/aspeed-vhub/hub.c b/drivers/usb/gadget/udc/aspeed-vhub/hub.c index 6497185ec4e7..bfd8e77788e2 100644 --- a/drivers/usb/gadget/udc/aspeed-vhub/hub.c +++ b/drivers/usb/gadget/udc/aspeed-vhub/hub.c @@ -999,8 +999,10 @@ static int ast_vhub_of_parse_str_desc(struct ast_vhub *vhub, str_array[offset].s = NULL; ret = ast_vhub_str_alloc_add(vhub, &lang_str); - if (ret) + if (ret) { + of_node_put(child); break; + } } return ret; diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index 45c54d56ecbd..b45e5bf08997 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -200,6 +200,8 @@ static struct mu3h_sch_ep_info *create_sch_ep(struct usb_device *udev, sch_ep->sch_tt = tt; sch_ep->ep = ep; + INIT_LIST_HEAD(&sch_ep->endpoint); + INIT_LIST_HEAD(&sch_ep->tt_endpoint); return sch_ep; } @@ -373,6 +375,7 @@ static void update_bus_bw(struct mu3h_sch_bw_info *sch_bw, sch_ep->bw_budget_table[j]; } } + sch_ep->allocated = used; } static int check_sch_tt(struct usb_device *udev, @@ -541,6 +544,22 @@ static int check_sch_bw(struct usb_device *udev, return 0; } +static void destroy_sch_ep(struct usb_device *udev, + struct mu3h_sch_bw_info *sch_bw, struct mu3h_sch_ep_info *sch_ep) +{ + /* only release ep bw check passed by check_sch_bw() */ + if (sch_ep->allocated) + update_bus_bw(sch_bw, sch_ep, 0); + + list_del(&sch_ep->endpoint); + + if (sch_ep->sch_tt) { + list_del(&sch_ep->tt_endpoint); + drop_tt(udev); + } + kfree(sch_ep); +} + static bool need_bw_sch(struct usb_host_endpoint *ep, enum usb_device_speed speed, int has_tt) { @@ -583,6 +602,8 @@ int xhci_mtk_sch_init(struct xhci_hcd_mtk *mtk) mtk->sch_array = sch_array; + INIT_LIST_HEAD(&mtk->bw_ep_chk_list); + return 0; } EXPORT_SYMBOL_GPL(xhci_mtk_sch_init); @@ -601,19 +622,14 @@ int xhci_mtk_add_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, struct xhci_ep_ctx *ep_ctx; struct xhci_slot_ctx *slot_ctx; struct xhci_virt_device *virt_dev; - struct mu3h_sch_bw_info *sch_bw; struct mu3h_sch_ep_info *sch_ep; - struct mu3h_sch_bw_info *sch_array; unsigned int ep_index; - int bw_index; - int ret = 0; xhci = hcd_to_xhci(hcd); virt_dev = xhci->devs[udev->slot_id]; ep_index = xhci_get_endpoint_index(&ep->desc); slot_ctx = xhci_get_slot_ctx(xhci, virt_dev->in_ctx); ep_ctx = xhci_get_ep_ctx(xhci, virt_dev->in_ctx, ep_index); - sch_array = mtk->sch_array; xhci_dbg(xhci, "%s() type:%d, speed:%d, mpkt:%d, dir:%d, ep:%p\n", __func__, usb_endpoint_type(&ep->desc), udev->speed, @@ -632,35 +648,13 @@ int xhci_mtk_add_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, return 0; } - bw_index = get_bw_index(xhci, udev, ep); - sch_bw = &sch_array[bw_index]; - sch_ep = create_sch_ep(udev, ep, ep_ctx); if (IS_ERR_OR_NULL(sch_ep)) return -ENOMEM; setup_sch_info(udev, ep_ctx, sch_ep); - ret = check_sch_bw(udev, sch_bw, sch_ep); - if (ret) { - xhci_err(xhci, "Not enough bandwidth!\n"); - if (is_fs_or_ls(udev->speed)) - drop_tt(udev); - - kfree(sch_ep); - return -ENOSPC; - } - - list_add_tail(&sch_ep->endpoint, &sch_bw->bw_ep_list); - - ep_ctx->reserved[0] |= cpu_to_le32(EP_BPKTS(sch_ep->pkts) - | EP_BCSCOUNT(sch_ep->cs_count) | EP_BBM(sch_ep->burst_mode)); - ep_ctx->reserved[1] |= cpu_to_le32(EP_BOFFSET(sch_ep->offset) - | EP_BREPEAT(sch_ep->repeat)); - - xhci_dbg(xhci, " PKTS:%x, CSCOUNT:%x, BM:%x, OFFSET:%x, REPEAT:%x\n", - sch_ep->pkts, sch_ep->cs_count, sch_ep->burst_mode, - sch_ep->offset, sch_ep->repeat); + list_add_tail(&sch_ep->endpoint, &mtk->bw_ep_chk_list); return 0; } @@ -675,7 +669,7 @@ void xhci_mtk_drop_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, struct xhci_virt_device *virt_dev; struct mu3h_sch_bw_info *sch_array; struct mu3h_sch_bw_info *sch_bw; - struct mu3h_sch_ep_info *sch_ep; + struct mu3h_sch_ep_info *sch_ep, *tmp; int bw_index; xhci = hcd_to_xhci(hcd); @@ -694,17 +688,79 @@ void xhci_mtk_drop_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, bw_index = get_bw_index(xhci, udev, ep); sch_bw = &sch_array[bw_index]; - list_for_each_entry(sch_ep, &sch_bw->bw_ep_list, endpoint) { + list_for_each_entry_safe(sch_ep, tmp, &sch_bw->bw_ep_list, endpoint) { if (sch_ep->ep == ep) { - update_bus_bw(sch_bw, sch_ep, 0); - list_del(&sch_ep->endpoint); - if (is_fs_or_ls(udev->speed)) { - list_del(&sch_ep->tt_endpoint); - drop_tt(udev); - } - kfree(sch_ep); + destroy_sch_ep(udev, sch_bw, sch_ep); break; } } } EXPORT_SYMBOL_GPL(xhci_mtk_drop_ep_quirk); + +int xhci_mtk_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) +{ + struct xhci_hcd_mtk *mtk = hcd_to_mtk(hcd); + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct xhci_virt_device *virt_dev = xhci->devs[udev->slot_id]; + struct mu3h_sch_bw_info *sch_bw; + struct mu3h_sch_ep_info *sch_ep, *tmp; + int bw_index, ret; + + xhci_dbg(xhci, "%s() udev %s\n", __func__, dev_name(&udev->dev)); + + list_for_each_entry(sch_ep, &mtk->bw_ep_chk_list, endpoint) { + bw_index = get_bw_index(xhci, udev, sch_ep->ep); + sch_bw = &mtk->sch_array[bw_index]; + + ret = check_sch_bw(udev, sch_bw, sch_ep); + if (ret) { + xhci_err(xhci, "Not enough bandwidth!\n"); + return -ENOSPC; + } + } + + list_for_each_entry_safe(sch_ep, tmp, &mtk->bw_ep_chk_list, endpoint) { + struct xhci_ep_ctx *ep_ctx; + struct usb_host_endpoint *ep = sch_ep->ep; + unsigned int ep_index = xhci_get_endpoint_index(&ep->desc); + + bw_index = get_bw_index(xhci, udev, ep); + sch_bw = &mtk->sch_array[bw_index]; + + list_move_tail(&sch_ep->endpoint, &sch_bw->bw_ep_list); + + ep_ctx = xhci_get_ep_ctx(xhci, virt_dev->in_ctx, ep_index); + ep_ctx->reserved[0] |= cpu_to_le32(EP_BPKTS(sch_ep->pkts) + | EP_BCSCOUNT(sch_ep->cs_count) + | EP_BBM(sch_ep->burst_mode)); + ep_ctx->reserved[1] |= cpu_to_le32(EP_BOFFSET(sch_ep->offset) + | EP_BREPEAT(sch_ep->repeat)); + + xhci_dbg(xhci, " PKTS:%x, CSCOUNT:%x, BM:%x, OFFSET:%x, REPEAT:%x\n", + sch_ep->pkts, sch_ep->cs_count, sch_ep->burst_mode, + sch_ep->offset, sch_ep->repeat); + } + + return xhci_check_bandwidth(hcd, udev); +} +EXPORT_SYMBOL_GPL(xhci_mtk_check_bandwidth); + +void xhci_mtk_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) +{ + struct xhci_hcd_mtk *mtk = hcd_to_mtk(hcd); + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct mu3h_sch_bw_info *sch_bw; + struct mu3h_sch_ep_info *sch_ep, *tmp; + int bw_index; + + xhci_dbg(xhci, "%s() udev %s\n", __func__, dev_name(&udev->dev)); + + list_for_each_entry_safe(sch_ep, tmp, &mtk->bw_ep_chk_list, endpoint) { + bw_index = get_bw_index(xhci, udev, sch_ep->ep); + sch_bw = &mtk->sch_array[bw_index]; + destroy_sch_ep(udev, sch_bw, sch_ep); + } + + xhci_reset_bandwidth(hcd, udev); +} +EXPORT_SYMBOL_GPL(xhci_mtk_reset_bandwidth); diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c index 8f321f39ab96..fe010cc61f19 100644 --- a/drivers/usb/host/xhci-mtk.c +++ b/drivers/usb/host/xhci-mtk.c @@ -347,6 +347,8 @@ static void usb_wakeup_set(struct xhci_hcd_mtk *mtk, bool enable) static int xhci_mtk_setup(struct usb_hcd *hcd); static const struct xhci_driver_overrides xhci_mtk_overrides __initconst = { .reset = xhci_mtk_setup, + .check_bandwidth = xhci_mtk_check_bandwidth, + .reset_bandwidth = xhci_mtk_reset_bandwidth, }; static struct hc_driver __read_mostly xhci_mtk_hc_driver; diff --git a/drivers/usb/host/xhci-mtk.h b/drivers/usb/host/xhci-mtk.h index a93cfe817904..cbb09dfea62e 100644 --- a/drivers/usb/host/xhci-mtk.h +++ b/drivers/usb/host/xhci-mtk.h @@ -59,6 +59,7 @@ struct mu3h_sch_bw_info { * @ep_type: endpoint type * @maxpkt: max packet size of endpoint * @ep: address of usb_host_endpoint struct + * @allocated: the bandwidth is aready allocated from bus_bw * @offset: which uframe of the interval that transfer should be * scheduled first time within the interval * @repeat: the time gap between two uframes that transfers are @@ -86,6 +87,7 @@ struct mu3h_sch_ep_info { u32 ep_type; u32 maxpkt; void *ep; + bool allocated; /* * mtk xHCI scheduling information put into reserved DWs * in ep context @@ -131,6 +133,7 @@ struct xhci_hcd_mtk { struct device *dev; struct usb_hcd *hcd; struct mu3h_sch_bw_info *sch_array; + struct list_head bw_ep_chk_list; struct mu3c_ippc_regs __iomem *ippc_regs; bool has_ippc; int num_u2_ports; @@ -166,6 +169,8 @@ int xhci_mtk_add_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *ep); void xhci_mtk_drop_ep_quirk(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *ep); +int xhci_mtk_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev); +void xhci_mtk_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev); #else static inline int xhci_mtk_add_ep_quirk(struct usb_hcd *hcd, @@ -179,6 +184,16 @@ static inline void xhci_mtk_drop_ep_quirk(struct usb_hcd *hcd, { } +static inline int xhci_mtk_check_bandwidth(struct usb_hcd *hcd, + struct usb_device *udev) +{ + return 0; +} + +static inline void xhci_mtk_reset_bandwidth(struct usb_hcd *hcd, + struct usb_device *udev) +{ +} #endif #endif /* _XHCI_MTK_H_ */ diff --git a/drivers/usb/host/xhci-mvebu.c b/drivers/usb/host/xhci-mvebu.c index 60651a50770f..8ca1a235d164 100644 --- a/drivers/usb/host/xhci-mvebu.c +++ b/drivers/usb/host/xhci-mvebu.c @@ -8,6 +8,7 @@ #include <linux/mbus.h> #include <linux/of.h> #include <linux/platform_device.h> +#include <linux/phy/phy.h> #include <linux/usb.h> #include <linux/usb/hcd.h> @@ -74,6 +75,47 @@ int xhci_mvebu_mbus_init_quirk(struct usb_hcd *hcd) return 0; } +int xhci_mvebu_a3700_plat_setup(struct usb_hcd *hcd) +{ + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct device *dev = hcd->self.controller; + struct phy *phy; + int ret; + + /* Old bindings miss the PHY handle */ + phy = of_phy_get(dev->of_node, "usb3-phy"); + if (IS_ERR(phy) && PTR_ERR(phy) == -EPROBE_DEFER) + return -EPROBE_DEFER; + else if (IS_ERR(phy)) + goto phy_out; + + ret = phy_init(phy); + if (ret) + goto phy_put; + + ret = phy_set_mode(phy, PHY_MODE_USB_HOST_SS); + if (ret) + goto phy_exit; + + ret = phy_power_on(phy); + if (ret == -EOPNOTSUPP) { + /* Skip initializatin of XHCI PHY when it is unsupported by firmware */ + dev_warn(dev, "PHY unsupported by firmware\n"); + xhci->quirks |= XHCI_SKIP_PHY_INIT; + } + if (ret) + goto phy_exit; + + phy_power_off(phy); +phy_exit: + phy_exit(phy); +phy_put: + of_phy_put(phy); +phy_out: + + return 0; +} + int xhci_mvebu_a3700_init_quirk(struct usb_hcd *hcd) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); diff --git a/drivers/usb/host/xhci-mvebu.h b/drivers/usb/host/xhci-mvebu.h index 3be021793cc8..01bf3fcb3eca 100644 --- a/drivers/usb/host/xhci-mvebu.h +++ b/drivers/usb/host/xhci-mvebu.h @@ -12,6 +12,7 @@ struct usb_hcd; #if IS_ENABLED(CONFIG_USB_XHCI_MVEBU) int xhci_mvebu_mbus_init_quirk(struct usb_hcd *hcd); +int xhci_mvebu_a3700_plat_setup(struct usb_hcd *hcd); int xhci_mvebu_a3700_init_quirk(struct usb_hcd *hcd); #else static inline int xhci_mvebu_mbus_init_quirk(struct usb_hcd *hcd) @@ -19,6 +20,11 @@ static inline int xhci_mvebu_mbus_init_quirk(struct usb_hcd *hcd) return 0; } +static inline int xhci_mvebu_a3700_plat_setup(struct usb_hcd *hcd) +{ + return 0; +} + static inline int xhci_mvebu_a3700_init_quirk(struct usb_hcd *hcd) { return 0; diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 4d34f6005381..c1edcc9b13ce 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -44,6 +44,16 @@ static void xhci_priv_plat_start(struct usb_hcd *hcd) priv->plat_start(hcd); } +static int xhci_priv_plat_setup(struct usb_hcd *hcd) +{ + struct xhci_plat_priv *priv = hcd_to_xhci_priv(hcd); + + if (!priv->plat_setup) + return 0; + + return priv->plat_setup(hcd); +} + static int xhci_priv_init_quirk(struct usb_hcd *hcd) { struct xhci_plat_priv *priv = hcd_to_xhci_priv(hcd); @@ -111,6 +121,7 @@ static const struct xhci_plat_priv xhci_plat_marvell_armada = { }; static const struct xhci_plat_priv xhci_plat_marvell_armada3700 = { + .plat_setup = xhci_mvebu_a3700_plat_setup, .init_quirk = xhci_mvebu_a3700_init_quirk, }; @@ -330,7 +341,14 @@ static int xhci_plat_probe(struct platform_device *pdev) hcd->tpl_support = of_usb_host_tpl_support(sysdev->of_node); xhci->shared_hcd->tpl_support = hcd->tpl_support; - if (priv && (priv->quirks & XHCI_SKIP_PHY_INIT)) + + if (priv) { + ret = xhci_priv_plat_setup(hcd); + if (ret) + goto disable_usb_phy; + } + + if ((xhci->quirks & XHCI_SKIP_PHY_INIT) || (priv && (priv->quirks & XHCI_SKIP_PHY_INIT))) hcd->skip_phy_initialization = 1; if (priv && (priv->quirks & XHCI_SG_TRB_CACHE_SIZE_QUIRK)) diff --git a/drivers/usb/host/xhci-plat.h b/drivers/usb/host/xhci-plat.h index 1fb149d1fbce..561d0b7bce09 100644 --- a/drivers/usb/host/xhci-plat.h +++ b/drivers/usb/host/xhci-plat.h @@ -13,6 +13,7 @@ struct xhci_plat_priv { const char *firmware_name; unsigned long long quirks; + int (*plat_setup)(struct usb_hcd *); void (*plat_start)(struct usb_hcd *); int (*init_quirk)(struct usb_hcd *); int (*suspend_quirk)(struct usb_hcd *); diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index cf0c93a90200..89c3be9917f6 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -699,11 +699,16 @@ static void xhci_unmap_td_bounce_buffer(struct xhci_hcd *xhci, dma_unmap_single(dev, seg->bounce_dma, ring->bounce_buf_len, DMA_FROM_DEVICE); /* for in tranfers we need to copy the data from bounce to sg */ - len = sg_pcopy_from_buffer(urb->sg, urb->num_sgs, seg->bounce_buf, - seg->bounce_len, seg->bounce_offs); - if (len != seg->bounce_len) - xhci_warn(xhci, "WARN Wrong bounce buffer read length: %zu != %d\n", - len, seg->bounce_len); + if (urb->num_sgs) { + len = sg_pcopy_from_buffer(urb->sg, urb->num_sgs, seg->bounce_buf, + seg->bounce_len, seg->bounce_offs); + if (len != seg->bounce_len) + xhci_warn(xhci, "WARN Wrong bounce buffer read length: %zu != %d\n", + len, seg->bounce_len); + } else { + memcpy(urb->transfer_buffer + seg->bounce_offs, seg->bounce_buf, + seg->bounce_len); + } seg->bounce_len = 0; seg->bounce_offs = 0; } @@ -3277,12 +3282,16 @@ static int xhci_align_td(struct xhci_hcd *xhci, struct urb *urb, u32 enqd_len, /* create a max max_pkt sized bounce buffer pointed to by last trb */ if (usb_urb_dir_out(urb)) { - len = sg_pcopy_to_buffer(urb->sg, urb->num_sgs, - seg->bounce_buf, new_buff_len, enqd_len); - if (len != new_buff_len) - xhci_warn(xhci, - "WARN Wrong bounce buffer write length: %zu != %d\n", - len, new_buff_len); + if (urb->num_sgs) { + len = sg_pcopy_to_buffer(urb->sg, urb->num_sgs, + seg->bounce_buf, new_buff_len, enqd_len); + if (len != new_buff_len) + xhci_warn(xhci, "WARN Wrong bounce buffer write length: %zu != %d\n", + len, new_buff_len); + } else { + memcpy(seg->bounce_buf, urb->transfer_buffer + enqd_len, new_buff_len); + } + seg->bounce_dma = dma_map_single(dev, seg->bounce_buf, max_pkt, DMA_TO_DEVICE); } else { diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index e86940571b4c..345a221028c6 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -2985,7 +2985,7 @@ static void xhci_check_bw_drop_ep_streams(struct xhci_hcd *xhci, * else should be touching the xhci->devs[slot_id] structure, so we * don't need to take the xhci->lock for manipulating that. */ -static int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) +int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) { int i; int ret = 0; @@ -3083,7 +3083,7 @@ command_cleanup: return ret; } -static void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) +void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) { struct xhci_hcd *xhci; struct xhci_virt_device *virt_dev; @@ -5510,6 +5510,10 @@ void xhci_init_driver(struct hc_driver *drv, drv->reset = over->reset; if (over->start) drv->start = over->start; + if (over->check_bandwidth) + drv->check_bandwidth = over->check_bandwidth; + if (over->reset_bandwidth) + drv->reset_bandwidth = over->reset_bandwidth; } } EXPORT_SYMBOL_GPL(xhci_init_driver); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 25e57bc9c3cc..07ff95016f11 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1920,6 +1920,8 @@ struct xhci_driver_overrides { size_t extra_priv_size; int (*reset)(struct usb_hcd *hcd); int (*start)(struct usb_hcd *hcd); + int (*check_bandwidth)(struct usb_hcd *, struct usb_device *); + void (*reset_bandwidth)(struct usb_hcd *, struct usb_device *); }; #define XHCI_CFC_DELAY 10 @@ -2074,6 +2076,8 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks); void xhci_shutdown(struct usb_hcd *hcd); void xhci_init_driver(struct hc_driver *drv, const struct xhci_driver_overrides *over); +int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev); +void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev); int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id); int xhci_ext_cap_init(struct xhci_hcd *xhci); diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index ac9a81ae8216..e6fa13701808 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -126,6 +126,7 @@ struct usbhs_pkt *usbhs_pkt_pop(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt) } usbhs_pipe_clear_without_sequence(pipe, 0, 0); + usbhs_pipe_running(pipe, 0); __usbhsf_pkt_del(pkt); } diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index fbb10dfc56e3..7bec1e730b20 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -61,6 +61,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */ { USB_DEVICE(0x0908, 0x01FF) }, /* Siemens RUGGEDCOM USB Serial Console */ + { USB_DEVICE(0x0988, 0x0578) }, /* Teraoka AD2000 */ { USB_DEVICE(0x0B00, 0x3070) }, /* Ingenico 3070 */ { USB_DEVICE(0x0BED, 0x1100) }, /* MEI (TM) Cashflow-SC Bill/Voucher Acceptor */ { USB_DEVICE(0x0BED, 0x1101) }, /* MEI series 2000 Combo Acceptor */ @@ -201,6 +202,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1901, 0x0194) }, /* GE Healthcare Remote Alarm Box */ { USB_DEVICE(0x1901, 0x0195) }, /* GE B850/B650/B450 CP2104 DP UART interface */ { USB_DEVICE(0x1901, 0x0196) }, /* GE B850 CP2105 DP UART interface */ + { USB_DEVICE(0x199B, 0xBA30) }, /* LORD WSDA-200-USB */ { USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */ { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ { USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */ diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 3fe959104311..2049e66f34a3 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -425,6 +425,8 @@ static void option_instat_callback(struct urb *urb); #define CINTERION_PRODUCT_AHXX_2RMNET 0x0084 #define CINTERION_PRODUCT_AHXX_AUDIO 0x0085 #define CINTERION_PRODUCT_CLS8 0x00b0 +#define CINTERION_PRODUCT_MV31_MBIM 0x00b3 +#define CINTERION_PRODUCT_MV31_RMNET 0x00b7 /* Olivetti products */ #define OLIVETTI_VENDOR_ID 0x0b3c @@ -1914,6 +1916,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC25_MDMNET) }, { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) }, /* HC28 enumerates with Siemens or Cinterion VID depending on FW revision */ { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC28_MDMNET) }, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_MBIM, 0xff), + .driver_info = RSVD(3)}, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_RMNET, 0xff), + .driver_info = RSVD(0)}, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100), .driver_info = RSVD(4) }, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD120), diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 88dde3455bfd..b5fe6d2ad22f 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -87,6 +87,7 @@ struct mlx5_vq_restore_info { u64 device_addr; u64 driver_addr; u16 avail_index; + u16 used_index; bool ready; struct vdpa_callback cb; bool restore; @@ -121,6 +122,7 @@ struct mlx5_vdpa_virtqueue { u32 virtq_id; struct mlx5_vdpa_net *ndev; u16 avail_idx; + u16 used_idx; int fw_state; /* keep last in the struct */ @@ -804,6 +806,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context); MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); + MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, get_features_12_3(ndev->mvdev.actual_features)); vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); @@ -1022,6 +1025,7 @@ static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *m struct mlx5_virtq_attr { u8 state; u16 available_index; + u16 used_index; }; static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, @@ -1052,6 +1056,7 @@ static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueu memset(attr, 0, sizeof(*attr)); attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); + attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index); kfree(out); return 0; @@ -1535,6 +1540,16 @@ static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) } } +static void clear_virtqueues(struct mlx5_vdpa_net *ndev) +{ + int i; + + for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) { + ndev->vqs[i].avail_idx = 0; + ndev->vqs[i].used_idx = 0; + } +} + /* TODO: cross-endian support */ static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev) { @@ -1610,6 +1625,7 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu return err; ri->avail_index = attr.available_index; + ri->used_index = attr.used_index; ri->ready = mvq->ready; ri->num_ent = mvq->num_ent; ri->desc_addr = mvq->desc_addr; @@ -1654,6 +1670,7 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev) continue; mvq->avail_idx = ri->avail_index; + mvq->used_idx = ri->used_index; mvq->ready = ri->ready; mvq->num_ent = ri->num_ent; mvq->desc_addr = ri->desc_addr; @@ -1768,6 +1785,7 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) if (!status) { mlx5_vdpa_info(mvdev, "performing device reset\n"); teardown_driver(ndev); + clear_virtqueues(ndev); mlx5_vdpa_destroy_mr(&ndev->mvdev); ndev->mvdev.status = 0; ndev->mvdev.mlx_features = 0; diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index e850f79351cb..b249f2d6b0cc 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -63,6 +63,7 @@ #include <xen/interface/physdev.h> #include <xen/interface/sched.h> #include <xen/interface/vcpu.h> +#include <xen/xenbus.h> #include <asm/hw_irq.h> #include "events_internal.h" @@ -115,6 +116,7 @@ struct irq_info { unsigned char flags; uint16_t domid; } pirq; + struct xenbus_device *interdomain; } u; }; @@ -313,11 +315,16 @@ static int xen_irq_info_common_setup(struct irq_info *info, } static int xen_irq_info_evtchn_setup(unsigned irq, - evtchn_port_t evtchn) + evtchn_port_t evtchn, + struct xenbus_device *dev) { struct irq_info *info = info_for_irq(irq); + int ret; - return xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0); + ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0); + info->u.interdomain = dev; + + return ret; } static int xen_irq_info_ipi_setup(unsigned cpu, @@ -1116,7 +1123,8 @@ int xen_pirq_from_irq(unsigned irq) } EXPORT_SYMBOL_GPL(xen_pirq_from_irq); -static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip) +static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, + struct xenbus_device *dev) { int irq; int ret; @@ -1136,7 +1144,7 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip) irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "event"); - ret = xen_irq_info_evtchn_setup(irq, evtchn); + ret = xen_irq_info_evtchn_setup(irq, evtchn, dev); if (ret < 0) { __unbind_from_irq(irq); irq = ret; @@ -1163,7 +1171,7 @@ out: int bind_evtchn_to_irq(evtchn_port_t evtchn) { - return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip); + return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip, NULL); } EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); @@ -1212,27 +1220,27 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) return irq; } -static int bind_interdomain_evtchn_to_irq_chip(unsigned int remote_domain, +static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev, evtchn_port_t remote_port, struct irq_chip *chip) { struct evtchn_bind_interdomain bind_interdomain; int err; - bind_interdomain.remote_dom = remote_domain; + bind_interdomain.remote_dom = dev->otherend_id; bind_interdomain.remote_port = remote_port; err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, &bind_interdomain); return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port, - chip); + chip, dev); } -int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain, +int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev, evtchn_port_t remote_port) { - return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, + return bind_interdomain_evtchn_to_irq_chip(dev, remote_port, &xen_lateeoi_chip); } EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi); @@ -1345,7 +1353,7 @@ static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn, { int irq, retval; - irq = bind_evtchn_to_irq_chip(evtchn, chip); + irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL); if (irq < 0) return irq; retval = request_irq(irq, handler, irqflags, devname, dev_id); @@ -1380,14 +1388,13 @@ int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn, EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi); static int bind_interdomain_evtchn_to_irqhandler_chip( - unsigned int remote_domain, evtchn_port_t remote_port, + struct xenbus_device *dev, evtchn_port_t remote_port, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id, struct irq_chip *chip) { int irq, retval; - irq = bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, - chip); + irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip); if (irq < 0) return irq; @@ -1400,14 +1407,14 @@ static int bind_interdomain_evtchn_to_irqhandler_chip( return irq; } -int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain, +int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device *dev, evtchn_port_t remote_port, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) { - return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain, + return bind_interdomain_evtchn_to_irqhandler_chip(dev, remote_port, handler, irqflags, devname, dev_id, &xen_lateeoi_chip); } @@ -1679,7 +1686,7 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) so there should be a proper type */ BUG_ON(info->type == IRQT_UNBOUND); - (void)xen_irq_info_evtchn_setup(irq, evtchn); + (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL); mutex_unlock(&irq_mapping_update_lock); diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index a7d293fa8d14..b47fd8435061 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -348,7 +348,7 @@ static struct sock_mapping *pvcalls_new_active_socket( map->bytes = page; ret = bind_interdomain_evtchn_to_irqhandler_lateeoi( - fedata->dev->otherend_id, evtchn, + fedata->dev, evtchn, pvcalls_back_conn_event, 0, "pvcalls-backend", map); if (ret < 0) goto out; @@ -948,7 +948,7 @@ static int backend_connect(struct xenbus_device *dev) goto error; } - err = bind_interdomain_evtchn_to_irq_lateeoi(dev->otherend_id, evtchn); + err = bind_interdomain_evtchn_to_irq_lateeoi(dev, evtchn); if (err < 0) goto error; fedata->irq = err; diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index e7c692cfb2cf..5188f02e75fb 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -124,7 +124,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, pdev->sh_info = vaddr; err = bind_interdomain_evtchn_to_irqhandler_lateeoi( - pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, + pdev->xdev, remote_evtchn, xen_pcibk_handle_event, 0, DRV_NAME, pdev); if (err < 0) { xenbus_dev_fatal(pdev->xdev, err, diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 862162dca33c..8b59897b2df9 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -799,7 +799,7 @@ static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref, sring = (struct vscsiif_sring *)area; BACK_RING_INIT(&info->ring, sring, PAGE_SIZE); - err = bind_interdomain_evtchn_to_irq_lateeoi(info->domid, evtchn); + err = bind_interdomain_evtchn_to_irq_lateeoi(info->dev, evtchn); if (err < 0) goto unmap_page; diff --git a/fs/Kconfig b/fs/Kconfig index aa4c12282301..da524c4d7b7e 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -203,7 +203,7 @@ config TMPFS_XATTR config TMPFS_INODE64 bool "Use 64-bit ino_t by default in tmpfs" - depends on TMPFS && 64BIT + depends on TMPFS && 64BIT && !(S390 || ALPHA) default n help tmpfs has historically used only inode numbers as wide as an unsigned diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 68900f1629bf..97ac363b5df1 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -737,6 +737,7 @@ static int cifs_d_revalidate(struct dentry *direntry, unsigned int flags) { struct inode *inode; + int rc; if (flags & LOOKUP_RCU) return -ECHILD; @@ -746,8 +747,25 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags) if ((flags & LOOKUP_REVAL) && !CIFS_CACHE_READ(CIFS_I(inode))) CIFS_I(inode)->time = 0; /* force reval */ - if (cifs_revalidate_dentry(direntry)) - return 0; + rc = cifs_revalidate_dentry(direntry); + if (rc) { + cifs_dbg(FYI, "cifs_revalidate_dentry failed with rc=%d", rc); + switch (rc) { + case -ENOENT: + case -ESTALE: + /* + * Those errors mean the dentry is invalid + * (file was deleted or recreated) + */ + return 0; + default: + /* + * Otherwise some unexpected error happened + * report it as-is to VFS layer + */ + return rc; + } + } else { /* * If the inode wasn't known to be a dfs entry when diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index d85edf5d1429..a5a9e33c0d73 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -286,7 +286,7 @@ struct smb2_negotiate_req { __le32 NegotiateContextOffset; /* SMB3.1.1 only. MBZ earlier */ __le16 NegotiateContextCount; /* SMB3.1.1 only. MBZ earlier */ __le16 Reserved2; - __le16 Dialects[1]; /* One dialect (vers=) at a time for now */ + __le16 Dialects[4]; /* BB expand this if autonegotiate > 4 dialects */ } __packed; /* Dialects */ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 95ef26b555b9..4a2b836eb017 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -666,10 +666,22 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num, if (*credits < num) { /* - * Return immediately if not too many requests in flight since - * we will likely be stuck on waiting for credits. + * If the server is tight on resources or just gives us less + * credits for other reasons (e.g. requests are coming out of + * order and the server delays granting more credits until it + * processes a missing mid) and we exhausted most available + * credits there may be situations when we try to send + * a compound request but we don't have enough credits. At this + * point the client needs to decide if it should wait for + * additional credits or fail the request. If at least one + * request is in flight there is a high probability that the + * server will return enough credits to satisfy this compound + * request. + * + * Return immediately if no requests in flight since we will be + * stuck on waiting for credits. */ - if (server->in_flight < num - *credits) { + if (server->in_flight == 0) { spin_unlock(&server->req_lock); trace_smb3_insufficient_credits(server->CurrentMid, server->hostname, scredits, sin_flight); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index b5c109703daa..21c20fd5f9ee 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -735,9 +735,10 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, mutex_unlock(&hugetlb_fault_mutex_table[hash]); + set_page_huge_active(page); /* * unlock_page because locked by add_to_page_cache() - * page_put due to reference from alloc_huge_page() + * put_page() due to reference from alloc_huge_page() */ unlock_page(page); put_page(page); diff --git a/fs/io_uring.c b/fs/io_uring.c index 38c6cbe1ab38..1f68105a41ed 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2205,6 +2205,9 @@ static void __io_req_task_submit(struct io_kiocb *req) else __io_req_task_cancel(req, -EFAULT); mutex_unlock(&ctx->uring_lock); + + if (ctx->flags & IORING_SETUP_SQPOLL) + io_sq_thread_drop_mm_files(); } static void io_req_task_submit(struct callback_head *cb) @@ -8982,12 +8985,6 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { atomic_dec(&task->io_uring->in_idle); - /* - * If the files that are going away are the ones in the thread - * identity, clear them out. - */ - if (task->io_uring->identity->files == files) - task->io_uring->identity->files = NULL; io_sq_thread_unpark(ctx->sq_data); } } diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 64bc81363c6c..e1bd592ce700 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -141,6 +141,7 @@ const struct file_operations nilfs_file_operations = { /* .release = nilfs_release_file, */ .fsync = nilfs_sync_file, .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, }; const struct inode_operations nilfs_file_inode_operations = { diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index e5b616c93e11..0fed532efa68 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -84,6 +84,14 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old, if (ovl_is_private_xattr(sb, name)) continue; + + error = security_inode_copy_up_xattr(name); + if (error < 0 && error != -EOPNOTSUPP) + break; + if (error == 1) { + error = 0; + continue; /* Discard */ + } retry: size = vfs_getxattr(old, name, value, value_size); if (size == -ERANGE) @@ -107,13 +115,6 @@ retry: goto retry; } - error = security_inode_copy_up_xattr(name); - if (error < 0 && error != -EOPNOTSUPP) - break; - if (error == 1) { - error = 0; - continue; /* Discard */ - } error = vfs_setxattr(new, name, value, size, 0); if (error) { if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name)) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 28a075b5f5b2..d1efa3a5a503 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -992,8 +992,8 @@ static char *ovl_get_redirect(struct dentry *dentry, bool abs_redirect) buflen -= thislen; memcpy(&buf[buflen], name, thislen); - tmp = dget_dlock(d->d_parent); spin_unlock(&d->d_lock); + tmp = dget_parent(d); dput(d); d = tmp; diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index bd9dd38347ae..077d3ad343f6 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -398,8 +398,9 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) const struct cred *old_cred; int ret; - if (!ovl_should_sync(OVL_FS(file_inode(file)->i_sb))) - return 0; + ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb)); + if (ret <= 0) + return ret; ret = ovl_real_fdget_meta(file, &real, !datasync); if (ret) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index d739e14c6814..cf41bcb664bc 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -352,7 +352,9 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, goto out; if (!value && !upperdentry) { + old_cred = ovl_override_creds(dentry->d_sb); err = vfs_getxattr(realdentry, name, NULL, 0); + revert_creds(old_cred); if (err < 0) goto out_drop_write; } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index b487e48c7fd4..cb4e2d60ecf9 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -324,6 +324,7 @@ int ovl_check_metacopy_xattr(struct ovl_fs *ofs, struct dentry *dentry); bool ovl_is_metacopy_dentry(struct dentry *dentry); char *ovl_get_redirect_xattr(struct ovl_fs *ofs, struct dentry *dentry, int padding); +int ovl_sync_status(struct ovl_fs *ofs); static inline bool ovl_is_impuredir(struct super_block *sb, struct dentry *dentry) diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index fbd5e27ce66b..63efee554f69 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -81,6 +81,8 @@ struct ovl_fs { atomic_long_t last_ino; /* Whiteout dentry cache */ struct dentry *whiteout; + /* r/o snapshot of upperdir sb's only taken on volatile mounts */ + errseq_t errseq; }; static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 01620ebae1bd..f404a78e6b60 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -865,7 +865,7 @@ struct file *ovl_dir_real_file(const struct file *file, bool want_upper) struct ovl_dir_file *od = file->private_data; struct dentry *dentry = file->f_path.dentry; - struct file *realfile = od->realfile; + struct file *old, *realfile = od->realfile; if (!OVL_TYPE_UPPER(ovl_path_type(dentry))) return want_upper ? NULL : realfile; @@ -874,29 +874,20 @@ struct file *ovl_dir_real_file(const struct file *file, bool want_upper) * Need to check if we started out being a lower dir, but got copied up */ if (!od->is_upper) { - struct inode *inode = file_inode(file); - realfile = READ_ONCE(od->upperfile); if (!realfile) { struct path upperpath; ovl_path_upper(dentry, &upperpath); realfile = ovl_dir_open_realfile(file, &upperpath); + if (IS_ERR(realfile)) + return realfile; - inode_lock(inode); - if (!od->upperfile) { - if (IS_ERR(realfile)) { - inode_unlock(inode); - return realfile; - } - smp_store_release(&od->upperfile, realfile); - } else { - /* somebody has beaten us to it */ - if (!IS_ERR(realfile)) - fput(realfile); - realfile = od->upperfile; + old = cmpxchg_release(&od->upperfile, NULL, realfile); + if (old) { + fput(realfile); + realfile = old; } - inode_unlock(inode); } } @@ -909,8 +900,9 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, struct file *realfile; int err; - if (!ovl_should_sync(OVL_FS(file->f_path.dentry->d_sb))) - return 0; + err = ovl_sync_status(OVL_FS(file->f_path.dentry->d_sb)); + if (err <= 0) + return err; realfile = ovl_dir_real_file(file, true); err = PTR_ERR_OR_ZERO(realfile); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 2bd570cbe8a4..d58b8f2bf9d0 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -264,11 +264,20 @@ static int ovl_sync_fs(struct super_block *sb, int wait) struct super_block *upper_sb; int ret; - if (!ovl_upper_mnt(ofs)) - return 0; + ret = ovl_sync_status(ofs); + /* + * We have to always set the err, because the return value isn't + * checked in syncfs, and instead indirectly return an error via + * the sb's writeback errseq, which VFS inspects after this call. + */ + if (ret < 0) { + errseq_set(&sb->s_wb_err, -EIO); + return -EIO; + } + + if (!ret) + return ret; - if (!ovl_should_sync(ofs)) - return 0; /* * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC). * All the super blocks will be iterated, including upper_sb. @@ -1923,6 +1932,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) unsigned int numlower; int err; + err = -EIO; + if (WARN_ON(sb->s_user_ns != current_user_ns())) + goto out; + sb->s_d_op = &ovl_dentry_operations; err = -ENOMEM; @@ -1989,6 +2002,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &ovl_super_operations; if (ofs->config.upperdir) { + struct super_block *upper_sb; + if (!ofs->config.workdir) { pr_err("missing 'workdir'\n"); goto out_err; @@ -1998,6 +2013,16 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (err) goto out_err; + upper_sb = ovl_upper_mnt(ofs)->mnt_sb; + if (!ovl_should_sync(ofs)) { + ofs->errseq = errseq_sample(&upper_sb->s_wb_err); + if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) { + err = -EIO; + pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n"); + goto out_err; + } + } + err = ovl_get_workdir(sb, ofs, &upperpath); if (err) goto out_err; @@ -2005,9 +2030,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (!ofs->workdir) sb->s_flags |= SB_RDONLY; - sb->s_stack_depth = ovl_upper_mnt(ofs)->mnt_sb->s_stack_depth; - sb->s_time_gran = ovl_upper_mnt(ofs)->mnt_sb->s_time_gran; - + sb->s_stack_depth = upper_sb->s_stack_depth; + sb->s_time_gran = upper_sb->s_time_gran; } oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers); err = PTR_ERR(oe); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 6569031af3cd..9826b003f1d2 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -962,3 +962,30 @@ err_free: kfree(buf); return ERR_PTR(res); } + +/* + * ovl_sync_status() - Check fs sync status for volatile mounts + * + * Returns 1 if this is not a volatile mount and a real sync is required. + * + * Returns 0 if syncing can be skipped because mount is volatile, and no errors + * have occurred on the upperdir since the mount. + * + * Returns -errno if it is a volatile mount, and the error that occurred since + * the last mount. If the error code changes, it'll return the latest error + * code. + */ + +int ovl_sync_status(struct ovl_fs *ofs) +{ + struct vfsmount *mnt; + + if (ovl_should_sync(ofs)) + return 1; + + mnt = ovl_upper_mnt(ofs); + if (!mnt) + return 0; + + return errseq_check(&mnt->mnt_sb->s_wb_err, ofs->errseq); +} diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 8a19773b5a0b..45f44425d856 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -196,9 +196,15 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, length = SQUASHFS_COMPRESSED_SIZE(length); index += 2; - TRACE("Block @ 0x%llx, %scompressed size %d\n", index, + TRACE("Block @ 0x%llx, %scompressed size %d\n", index - 2, compressed ? "" : "un", length); } + if (length < 0 || length > output->length || + (index + length) > msblk->bytes_used) { + res = -EIO; + goto out; + } + if (next_index) *next_index = index + length; diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c index ae2c87bb0fbe..eb02072d28dd 100644 --- a/fs/squashfs/export.c +++ b/fs/squashfs/export.c @@ -41,12 +41,17 @@ static long long squashfs_inode_lookup(struct super_block *sb, int ino_num) struct squashfs_sb_info *msblk = sb->s_fs_info; int blk = SQUASHFS_LOOKUP_BLOCK(ino_num - 1); int offset = SQUASHFS_LOOKUP_BLOCK_OFFSET(ino_num - 1); - u64 start = le64_to_cpu(msblk->inode_lookup_table[blk]); + u64 start; __le64 ino; int err; TRACE("Entered squashfs_inode_lookup, inode_number = %d\n", ino_num); + if (ino_num == 0 || (ino_num - 1) >= msblk->inodes) + return -EINVAL; + + start = le64_to_cpu(msblk->inode_lookup_table[blk]); + err = squashfs_read_metadata(sb, &ino, &start, &offset, sizeof(ino)); if (err < 0) return err; @@ -111,7 +116,10 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb, u64 lookup_table_start, u64 next_table, unsigned int inodes) { unsigned int length = SQUASHFS_LOOKUP_BLOCK_BYTES(inodes); + unsigned int indexes = SQUASHFS_LOOKUP_BLOCKS(inodes); + int n; __le64 *table; + u64 start, end; TRACE("In read_inode_lookup_table, length %d\n", length); @@ -121,20 +129,37 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb, if (inodes == 0) return ERR_PTR(-EINVAL); - /* length bytes should not extend into the next table - this check - * also traps instances where lookup_table_start is incorrectly larger - * than the next table start + /* + * The computed size of the lookup table (length bytes) should exactly + * match the table start and end points */ - if (lookup_table_start + length > next_table) + if (length != (next_table - lookup_table_start)) return ERR_PTR(-EINVAL); table = squashfs_read_table(sb, lookup_table_start, length); + if (IS_ERR(table)) + return table; /* - * table[0] points to the first inode lookup table metadata block, - * this should be less than lookup_table_start + * table0], table[1], ... table[indexes - 1] store the locations + * of the compressed inode lookup blocks. Each entry should be + * less than the next (i.e. table[0] < table[1]), and the difference + * between them should be SQUASHFS_METADATA_SIZE or less. + * table[indexes - 1] should be less than lookup_table_start, and + * again the difference should be SQUASHFS_METADATA_SIZE or less */ - if (!IS_ERR(table) && le64_to_cpu(table[0]) >= lookup_table_start) { + for (n = 0; n < (indexes - 1); n++) { + start = le64_to_cpu(table[n]); + end = le64_to_cpu(table[n + 1]); + + if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) { + kfree(table); + return ERR_PTR(-EINVAL); + } + } + + start = le64_to_cpu(table[indexes - 1]); + if (start >= lookup_table_start || (lookup_table_start - start) > SQUASHFS_METADATA_SIZE) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c index 6be5afe7287d..11581bf31af4 100644 --- a/fs/squashfs/id.c +++ b/fs/squashfs/id.c @@ -35,10 +35,15 @@ int squashfs_get_id(struct super_block *sb, unsigned int index, struct squashfs_sb_info *msblk = sb->s_fs_info; int block = SQUASHFS_ID_BLOCK(index); int offset = SQUASHFS_ID_BLOCK_OFFSET(index); - u64 start_block = le64_to_cpu(msblk->id_table[block]); + u64 start_block; __le32 disk_id; int err; + if (index >= msblk->ids) + return -EINVAL; + + start_block = le64_to_cpu(msblk->id_table[block]); + err = squashfs_read_metadata(sb, &disk_id, &start_block, &offset, sizeof(disk_id)); if (err < 0) @@ -56,7 +61,10 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb, u64 id_table_start, u64 next_table, unsigned short no_ids) { unsigned int length = SQUASHFS_ID_BLOCK_BYTES(no_ids); + unsigned int indexes = SQUASHFS_ID_BLOCKS(no_ids); + int n; __le64 *table; + u64 start, end; TRACE("In read_id_index_table, length %d\n", length); @@ -67,20 +75,36 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb, return ERR_PTR(-EINVAL); /* - * length bytes should not extend into the next table - this check - * also traps instances where id_table_start is incorrectly larger - * than the next table start + * The computed size of the index table (length bytes) should exactly + * match the table start and end points */ - if (id_table_start + length > next_table) + if (length != (next_table - id_table_start)) return ERR_PTR(-EINVAL); table = squashfs_read_table(sb, id_table_start, length); + if (IS_ERR(table)) + return table; /* - * table[0] points to the first id lookup table metadata block, this - * should be less than id_table_start + * table[0], table[1], ... table[indexes - 1] store the locations + * of the compressed id blocks. Each entry should be less than + * the next (i.e. table[0] < table[1]), and the difference between them + * should be SQUASHFS_METADATA_SIZE or less. table[indexes - 1] + * should be less than id_table_start, and again the difference + * should be SQUASHFS_METADATA_SIZE or less */ - if (!IS_ERR(table) && le64_to_cpu(table[0]) >= id_table_start) { + for (n = 0; n < (indexes - 1); n++) { + start = le64_to_cpu(table[n]); + end = le64_to_cpu(table[n + 1]); + + if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) { + kfree(table); + return ERR_PTR(-EINVAL); + } + } + + start = le64_to_cpu(table[indexes - 1]); + if (start >= id_table_start || (id_table_start - start) > SQUASHFS_METADATA_SIZE) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 34c21ffb6df3..166e98806265 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -64,5 +64,6 @@ struct squashfs_sb_info { unsigned int inodes; unsigned int fragments; int xattr_ids; + unsigned int ids; }; #endif diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index d6c6593ec169..88cc94be1076 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -166,6 +166,7 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) msblk->directory_table = le64_to_cpu(sblk->directory_table_start); msblk->inodes = le32_to_cpu(sblk->inodes); msblk->fragments = le32_to_cpu(sblk->fragments); + msblk->ids = le16_to_cpu(sblk->no_ids); flags = le16_to_cpu(sblk->flags); TRACE("Found valid superblock on %pg\n", sb->s_bdev); @@ -177,7 +178,7 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) TRACE("Block size %d\n", msblk->block_size); TRACE("Number of inodes %d\n", msblk->inodes); TRACE("Number of fragments %d\n", msblk->fragments); - TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids)); + TRACE("Number of ids %d\n", msblk->ids); TRACE("sblk->inode_table_start %llx\n", msblk->inode_table); TRACE("sblk->directory_table_start %llx\n", msblk->directory_table); TRACE("sblk->fragment_table_start %llx\n", @@ -236,8 +237,7 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) allocate_id_index_table: /* Allocate and read id index table */ msblk->id_table = squashfs_read_id_index_table(sb, - le64_to_cpu(sblk->id_table_start), next_table, - le16_to_cpu(sblk->no_ids)); + le64_to_cpu(sblk->id_table_start), next_table, msblk->ids); if (IS_ERR(msblk->id_table)) { errorf(fc, "unable to read id index table"); err = PTR_ERR(msblk->id_table); diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h index 184129afd456..d8a270d3ac4c 100644 --- a/fs/squashfs/xattr.h +++ b/fs/squashfs/xattr.h @@ -17,8 +17,16 @@ extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *, static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start, u64 *xattr_table_start, int *xattr_ids) { + struct squashfs_xattr_id_table *id_table; + + id_table = squashfs_read_table(sb, start, sizeof(*id_table)); + if (IS_ERR(id_table)) + return (__le64 *) id_table; + + *xattr_table_start = le64_to_cpu(id_table->xattr_table_start); + kfree(id_table); + ERROR("Xattrs in filesystem, these will be ignored\n"); - *xattr_table_start = start; return ERR_PTR(-ENOTSUPP); } diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c index d99e08464554..ead66670b41a 100644 --- a/fs/squashfs/xattr_id.c +++ b/fs/squashfs/xattr_id.c @@ -31,10 +31,15 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index, struct squashfs_sb_info *msblk = sb->s_fs_info; int block = SQUASHFS_XATTR_BLOCK(index); int offset = SQUASHFS_XATTR_BLOCK_OFFSET(index); - u64 start_block = le64_to_cpu(msblk->xattr_id_table[block]); + u64 start_block; struct squashfs_xattr_id id; int err; + if (index >= msblk->xattr_ids) + return -EINVAL; + + start_block = le64_to_cpu(msblk->xattr_id_table[block]); + err = squashfs_read_metadata(sb, &id, &start_block, &offset, sizeof(id)); if (err < 0) @@ -50,13 +55,17 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index, /* * Read uncompressed xattr id lookup table indexes from disk into memory */ -__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start, +__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start, u64 *xattr_table_start, int *xattr_ids) { - unsigned int len; + struct squashfs_sb_info *msblk = sb->s_fs_info; + unsigned int len, indexes; struct squashfs_xattr_id_table *id_table; + __le64 *table; + u64 start, end; + int n; - id_table = squashfs_read_table(sb, start, sizeof(*id_table)); + id_table = squashfs_read_table(sb, table_start, sizeof(*id_table)); if (IS_ERR(id_table)) return (__le64 *) id_table; @@ -70,13 +79,52 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start, if (*xattr_ids == 0) return ERR_PTR(-EINVAL); - /* xattr_table should be less than start */ - if (*xattr_table_start >= start) + len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids); + indexes = SQUASHFS_XATTR_BLOCKS(*xattr_ids); + + /* + * The computed size of the index table (len bytes) should exactly + * match the table start and end points + */ + start = table_start + sizeof(*id_table); + end = msblk->bytes_used; + + if (len != (end - start)) return ERR_PTR(-EINVAL); - len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids); + table = squashfs_read_table(sb, start, len); + if (IS_ERR(table)) + return table; + + /* table[0], table[1], ... table[indexes - 1] store the locations + * of the compressed xattr id blocks. Each entry should be less than + * the next (i.e. table[0] < table[1]), and the difference between them + * should be SQUASHFS_METADATA_SIZE or less. table[indexes - 1] + * should be less than table_start, and again the difference + * shouls be SQUASHFS_METADATA_SIZE or less. + * + * Finally xattr_table_start should be less than table[0]. + */ + for (n = 0; n < (indexes - 1); n++) { + start = le64_to_cpu(table[n]); + end = le64_to_cpu(table[n + 1]); + + if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) { + kfree(table); + return ERR_PTR(-EINVAL); + } + } + + start = le64_to_cpu(table[indexes - 1]); + if (start >= table_start || (table_start - start) > SQUASHFS_METADATA_SIZE) { + kfree(table); + return ERR_PTR(-EINVAL); + } - TRACE("In read_xattr_index_table, length %d\n", len); + if (*xattr_table_start >= le64_to_cpu(table[0])) { + kfree(table); + return ERR_PTR(-EINVAL); + } - return squashfs_read_table(sb, start + sizeof(*id_table), len); + return table; } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index b2b3d81b1535..b97c628ad91f 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -459,7 +459,7 @@ } \ \ /* Built-in firmware blobs */ \ - .builtin_fw : AT(ADDR(.builtin_fw) - LOAD_OFFSET) { \ + .builtin_fw : AT(ADDR(.builtin_fw) - LOAD_OFFSET) ALIGN(8) { \ __start_builtin_fw = .; \ KEEP(*(.builtin_fw)) \ __end_builtin_fw = .; \ diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h index f5e92fe9151c..bd1c39907b92 100644 --- a/include/drm/drm_dp_mst_helper.h +++ b/include/drm/drm_dp_mst_helper.h @@ -783,6 +783,7 @@ drm_dp_mst_detect_port(struct drm_connector *connector, struct edid *drm_dp_mst_get_edid(struct drm_connector *connector, struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port); +int drm_dp_get_vc_payload_bw(int link_rate, int link_lane_count); int drm_dp_calc_pbn_mode(int clock, int bpp, bool dsc); diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index ca86a00abe86..a104b298019a 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -46,6 +46,7 @@ SYSCALL_WORK_SYSCALL_TRACE | \ SYSCALL_WORK_SYSCALL_AUDIT | \ SYSCALL_WORK_SYSCALL_USER_DISPATCH | \ + SYSCALL_WORK_SYSCALL_EXIT_TRAP | \ ARCH_SYSCALL_WORK_EXIT) /* diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ebca2ef02212..b5807f23caf8 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -770,6 +770,8 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, } #endif +void set_page_huge_active(struct page *page); + #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; diff --git a/include/linux/if_hsr.h b/include/linux/if_hsr.h new file mode 100644 index 000000000000..38bbc537d4e4 --- /dev/null +++ b/include/linux/if_hsr.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_IF_HSR_H_ +#define _LINUX_IF_HSR_H_ + +/* used to differentiate various protocols */ +enum hsr_version { + HSR_V0 = 0, + HSR_V1, + PRP_V1, +}; + +#if IS_ENABLED(CONFIG_HSR) +extern bool is_hsr_master(struct net_device *dev); +extern int hsr_get_version(struct net_device *dev, enum hsr_version *ver); +#else +static inline bool is_hsr_master(struct net_device *dev) +{ + return false; +} +static inline int hsr_get_version(struct net_device *dev, + enum hsr_version *ver) +{ + return -EINVAL; +} +#endif /* CONFIG_HSR */ + +#endif /*_LINUX_IF_HSR_H_*/ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b3f0e2018c62..efa96263b81b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -616,7 +616,10 @@ static inline void dev_iommu_fwspec_set(struct device *dev, static inline void *dev_iommu_priv_get(struct device *dev) { - return dev->iommu->priv; + if (dev->iommu) + return dev->iommu->priv; + else + return NULL; } static inline void dev_iommu_priv_set(struct device *dev, void *priv) diff --git a/include/linux/irq.h b/include/linux/irq.h index 4aeb1c4c7e07..2efde6a79b7e 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -928,7 +928,7 @@ int __devm_irq_alloc_descs(struct device *dev, int irq, unsigned int from, __irq_alloc_descs(irq, from, cnt, node, THIS_MODULE, NULL) #define irq_alloc_desc(node) \ - irq_alloc_descs(-1, 0, 1, node) + irq_alloc_descs(-1, 1, 1, node) #define irq_alloc_desc_at(at, node) \ irq_alloc_descs(at, at, 1, node) @@ -943,7 +943,7 @@ int __devm_irq_alloc_descs(struct device *dev, int irq, unsigned int from, __devm_irq_alloc_descs(dev, irq, from, cnt, node, THIS_MODULE, NULL) #define devm_irq_alloc_desc(dev, node) \ - devm_irq_alloc_descs(dev, -1, 0, 1, node) + devm_irq_alloc_descs(dev, -1, 1, 1, node) #define devm_irq_alloc_desc_at(dev, at, node) \ devm_irq_alloc_descs(dev, at, at, 1, node) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index fe1ae73ff8b5..0aea9e2a2a01 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -333,6 +333,13 @@ static inline void *kasan_reset_tag(const void *addr) return (void *)arch_kasan_reset_tag(addr); } +/** + * kasan_report - print a report about a bad memory access detected by KASAN + * @addr: address of the bad access + * @size: size of the bad access + * @is_write: whether the bad access is a write or a read + * @ip: instruction pointer for the accessibility check or the bad access itself + */ bool kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index b3a36b0cfc81..1883a4a9f16a 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -266,7 +266,7 @@ extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); extern int arch_populate_kprobe_blacklist(void); extern bool arch_kprobe_on_func_entry(unsigned long offset); -extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); +extern int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); extern int kprobe_add_ksym_blacklist(unsigned long entry); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 88197b87bd81..936302b2c141 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -588,7 +588,6 @@ struct mlx5_priv { /* end: alloc staff */ struct dentry *dbg_root; - struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; struct mlx5_adev **adev; diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index 29fd832950e0..994c2c8cb4fd 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -96,6 +96,35 @@ static inline u32 mlx5_eswitch_get_vport_metadata_mask(void) u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num); +u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, + u16 vport_num); + +/* Reg C1 usage: + * Reg C1 = < ESW_TUN_ID(12) | ESW_TUN_OPTS(12) | ESW_ZONE_ID(8) > + * + * Highest 12 bits of reg c1 is the encapsulation tunnel id, next 12 bits is + * encapsulation tunnel options, and the lowest 8 bits are used for zone id. + * + * Zone id is used to restore CT flow when packet misses on chain. + * + * Tunnel id and options are used together to restore the tunnel info metadata + * on miss and to support inner header rewrite by means of implicit chain 0 + * flows. + */ +#define ESW_ZONE_ID_BITS 8 +#define ESW_TUN_OPTS_BITS 12 +#define ESW_TUN_ID_BITS 12 +#define ESW_TUN_OPTS_OFFSET ESW_ZONE_ID_BITS +#define ESW_TUN_OFFSET ESW_TUN_OPTS_OFFSET +#define ESW_ZONE_ID_MASK GENMASK(ESW_ZONE_ID_BITS - 1, 0) +#define ESW_TUN_OPTS_MASK GENMASK(32 - ESW_TUN_ID_BITS - 1, ESW_TUN_OPTS_OFFSET) +#define ESW_TUN_MASK GENMASK(31, ESW_TUN_OFFSET) +#define ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT 0 /* 0 is not a valid tunnel id */ +#define ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT 0xFFF /* 0xFFF is a reserved mapping */ +#define ESW_TUN_SLOW_TABLE_GOTO_VPORT ((ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT << ESW_TUN_OPTS_BITS) | \ + ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT) +#define ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK ESW_TUN_OPTS_MASK + u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ diff --git a/include/linux/msi.h b/include/linux/msi.h index 360a0a7e7341..aef35fd1cf11 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -178,6 +178,12 @@ struct msi_desc { list_for_each_entry((desc), dev_to_msi_list((dev)), list) #define for_each_msi_entry_safe(desc, tmp, dev) \ list_for_each_entry_safe((desc), (tmp), dev_to_msi_list((dev)), list) +#define for_each_msi_vector(desc, __irq, dev) \ + for_each_msi_entry((desc), (dev)) \ + if ((desc)->irq) \ + for (__irq = (desc)->irq; \ + __irq < ((desc)->irq + (desc)->nvec_used); \ + __irq++) #ifdef CONFIG_IRQ_MSI_IOMMU static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index c06d6aaba9df..3de38d6a0aea 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -86,6 +86,11 @@ enum { NETIF_F_HW_MACSEC_BIT, /* Offload MACsec operations */ NETIF_F_GRO_UDP_FWD_BIT, /* Allow UDP GRO for forwarding */ + NETIF_F_HW_HSR_TAG_INS_BIT, /* Offload HSR tag insertion */ + NETIF_F_HW_HSR_TAG_RM_BIT, /* Offload HSR tag removal */ + NETIF_F_HW_HSR_FWD_BIT, /* Offload HSR forwarding */ + NETIF_F_HW_HSR_DUP_BIT, /* Offload HSR duplication */ + /* * Add your fresh new feature above and remember to update * netdev_features_strings[] in net/core/ethtool.c and maybe @@ -159,6 +164,10 @@ enum { #define NETIF_F_GSO_FRAGLIST __NETIF_F(GSO_FRAGLIST) #define NETIF_F_HW_MACSEC __NETIF_F(HW_MACSEC) #define NETIF_F_GRO_UDP_FWD __NETIF_F(GRO_UDP_FWD) +#define NETIF_F_HW_HSR_TAG_INS __NETIF_F(HW_HSR_TAG_INS) +#define NETIF_F_HW_HSR_TAG_RM __NETIF_F(HW_HSR_TAG_RM) +#define NETIF_F_HW_HSR_FWD __NETIF_F(HW_HSR_FWD) +#define NETIF_F_HW_HSR_DUP __NETIF_F(HW_HSR_DUP) /* Finds the next feature with the highest number of the range of start till 0. */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e9e7ada07ea1..bfadf3b82f9c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -347,6 +347,7 @@ struct napi_struct { struct list_head dev_list; struct hlist_node napi_hash_node; unsigned int napi_id; + struct task_struct *thread; }; enum { @@ -358,6 +359,7 @@ enum { NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */ NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */ NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/ + NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ }; enum { @@ -369,6 +371,7 @@ enum { NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL), + NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), }; enum gro_result { @@ -494,6 +497,8 @@ static inline bool napi_complete(struct napi_struct *n) return napi_complete_done(n, 0); } +int dev_set_threaded(struct net_device *dev, bool threaded); + /** * napi_disable - prevent NAPI from scheduling * @n: NAPI context @@ -503,20 +508,7 @@ static inline bool napi_complete(struct napi_struct *n) */ void napi_disable(struct napi_struct *n); -/** - * napi_enable - enable NAPI scheduling - * @n: NAPI context - * - * Resume NAPI from being scheduled on this context. - * Must be paired with napi_disable. - */ -static inline void napi_enable(struct napi_struct *n) -{ - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); - smp_mb__before_atomic(); - clear_bit(NAPI_STATE_SCHED, &n->state); - clear_bit(NAPI_STATE_NPSVC, &n->state); -} +void napi_enable(struct napi_struct *n); /** * napi_synchronize - wait until NAPI is not running @@ -1827,6 +1819,8 @@ enum netdev_priv_flags { * * @wol_enabled: Wake-on-LAN is enabled * + * @threaded: napi threaded mode is enabled + * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved * to another network namespace. @@ -2145,6 +2139,7 @@ struct net_device { struct lock_class_key *qdisc_running_key; bool proto_down; unsigned wol_enabled:1; + unsigned threaded:1; struct list_head net_notifier_list; @@ -3907,6 +3902,9 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, struct netlink_ext_ack *extack); int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack); +int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack); +int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_change_carrier(struct net_device *, bool new_carrier); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid); @@ -4341,6 +4339,7 @@ static inline void netif_tx_disable(struct net_device *dev) local_bh_disable(); cpu = smp_processor_id(); + spin_lock(&dev->tx_global_lock); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); @@ -4348,6 +4347,7 @@ static inline void netif_tx_disable(struct net_device *dev) netif_tx_stop_queue(txq); __netif_tx_unlock(txq); } + spin_unlock(&dev->tx_global_lock); local_bh_enable(); } diff --git a/include/linux/phy.h b/include/linux/phy.h index bc323fbdd21e..c130788306c8 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -503,6 +503,7 @@ struct macsec_ops; * * @speed: Current link speed * @duplex: Current duplex + * @port: Current port * @pause: Current pause * @asym_pause: Current asymmetric pause * @supported: Combined MAC/PHY supported linkmodes @@ -581,6 +582,7 @@ struct phy_device { */ int speed; int duplex; + int port; int pause; int asym_pause; u8 master_slave_get; diff --git a/include/linux/soc/marvell/octeontx2/asm.h b/include/linux/soc/marvell/octeontx2/asm.h index ae2279fe830a..28c04d918f0f 100644 --- a/include/linux/soc/marvell/octeontx2/asm.h +++ b/include/linux/soc/marvell/octeontx2/asm.h @@ -22,8 +22,16 @@ : [rs]"r" (ioaddr)); \ (result); \ }) +#define cn10k_lmt_flush(val, addr) \ +({ \ + __asm__ volatile(".cpu generic+lse\n" \ + "steor %x[rf],[%[rs]]" \ + : [rf]"+r"(val) \ + : [rs]"r"(addr)); \ +}) #else #define otx2_lmt_flush(ioaddr) ({ 0; }) +#define cn10k_lmt_flush(val, addr) ({ addr = val; }) #endif #endif /* __SOC_OTX2_ASM_H */ diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index c8a974cead73..9b2158c69275 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -43,6 +43,7 @@ enum syscall_work_bit { SYSCALL_WORK_BIT_SYSCALL_EMU, SYSCALL_WORK_BIT_SYSCALL_AUDIT, SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH, + SYSCALL_WORK_BIT_SYSCALL_EXIT_TRAP, }; #define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP) @@ -51,6 +52,7 @@ enum syscall_work_bit { #define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU) #define SYSCALL_WORK_SYSCALL_AUDIT BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT) #define SYSCALL_WORK_SYSCALL_USER_DISPATCH BIT(SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH) +#define SYSCALL_WORK_SYSCALL_EXIT_TRAP BIT(SYSCALL_WORK_BIT_SYSCALL_EXIT_TRAP) #endif #include <asm/thread_info.h> diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 0f21617f1a66..966ed8980327 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -307,11 +307,13 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) \ it_func_ptr = \ rcu_dereference_raw((&__tracepoint_##_name)->funcs); \ - do { \ - it_func = (it_func_ptr)->func; \ - __data = (it_func_ptr)->data; \ - ((void(*)(void *, proto))(it_func))(__data, args); \ - } while ((++it_func_ptr)->func); \ + if (it_func_ptr) { \ + do { \ + it_func = (it_func_ptr)->func; \ + __data = (it_func_ptr)->data; \ + ((void(*)(void *, proto))(it_func))(__data, args); \ + } while ((++it_func_ptr)->func); \ + } \ return 0; \ } \ DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name); diff --git a/include/linux/uio.h b/include/linux/uio.h index 72d88566694e..27ff8eb786dc 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -260,7 +260,13 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) { i->count = count; } -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, struct iov_iter *i); + +struct csum_state { + __wsum csum; + size_t off; +}; + +size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csstate, struct iov_iter *i); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 80c0181c411d..cedcda6593f6 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -24,7 +24,8 @@ struct notifier_block; /* in notifier.h */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ #define VM_NO_GUARD 0x00000040 /* don't add guard page */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ -#define VM_MAP_PUT_PAGES 0x00000100 /* put pages and free array in vfree */ +#define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */ +#define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ /* * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC. @@ -37,12 +38,6 @@ struct notifier_block; /* in notifier.h */ * determine which allocations need the module shadow freed. */ -/* - * Memory with VM_FLUSH_RESET_PERMS cannot be freed in an interrupt or with - * vfree_atomic(). - */ -#define VM_FLUSH_RESET_PERMS 0x00000100 /* Reset direct map and flush TLB on unmap */ - /* bits [20..32] reserved for arch specific ioremap internals */ /* diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index c1504aa3d9cf..ba2f439bc04d 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -238,6 +238,14 @@ enum { * during the hdev->setup vendor callback. */ HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, + + /* + * When this quirk is set, then the hci_suspend_notifier is not + * registered. This is intended for devices which drop completely + * from the bus on system-suspend and which will show up as a new + * HCI after resume. + */ + HCI_QUIRK_NO_SUSPEND_NOTIFIER, }; /* HCI device flags */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 677a8c50b2ad..ebdd4afe30d2 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -105,6 +105,8 @@ enum suspend_tasks { SUSPEND_POWERING_DOWN, SUSPEND_PREPARE_NOTIFIER, + + SUSPEND_SET_ADV_FILTER, __SUSPEND_NUM_TASKS }; @@ -250,15 +252,31 @@ struct adv_pattern { __u8 value[HCI_MAX_AD_LENGTH]; }; +struct adv_rssi_thresholds { + __s8 low_threshold; + __s8 high_threshold; + __u16 low_threshold_timeout; + __u16 high_threshold_timeout; + __u8 sampling_period; +}; + struct adv_monitor { struct list_head patterns; - bool active; + struct adv_rssi_thresholds rssi; __u16 handle; + + enum { + ADV_MONITOR_STATE_NOT_REGISTERED, + ADV_MONITOR_STATE_REGISTERED, + ADV_MONITOR_STATE_OFFLOADED + } state; }; #define HCI_MIN_ADV_MONITOR_HANDLE 1 -#define HCI_MAX_ADV_MONITOR_NUM_HANDLES 32 +#define HCI_MAX_ADV_MONITOR_NUM_HANDLES 32 #define HCI_MAX_ADV_MONITOR_NUM_PATTERNS 16 +#define HCI_ADV_MONITOR_EXT_NONE 1 +#define HCI_ADV_MONITOR_EXT_MSFT 2 #define HCI_MAX_SHORT_NAME_LENGTH 10 @@ -1316,10 +1334,15 @@ int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance); void hci_adv_instances_set_rpa_expired(struct hci_dev *hdev, bool rpa_expired); void hci_adv_monitors_clear(struct hci_dev *hdev); -void hci_free_adv_monitor(struct adv_monitor *monitor); -int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor); -int hci_remove_adv_monitor(struct hci_dev *hdev, u16 handle); +void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor); +int hci_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status); +int hci_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status); +bool hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, + int *err); +bool hci_remove_single_adv_monitor(struct hci_dev *hdev, u16 handle, int *err); +bool hci_remove_all_adv_monitor(struct hci_dev *hdev, int *err); bool hci_is_adv_monitoring(struct hci_dev *hdev); +int hci_get_adv_monitor_offload_ext(struct hci_dev *hdev); void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb); @@ -1342,6 +1365,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn); #define lmp_le_capable(dev) ((dev)->features[0][4] & LMP_LE) #define lmp_sniffsubr_capable(dev) ((dev)->features[0][5] & LMP_SNIFF_SUBR) #define lmp_pause_enc_capable(dev) ((dev)->features[0][5] & LMP_PAUSE_ENC) +#define lmp_esco_2m_capable(dev) ((dev)->features[0][5] & LMP_EDR_ESCO_2M) #define lmp_ext_inq_capable(dev) ((dev)->features[0][6] & LMP_EXT_INQ) #define lmp_le_br_capable(dev) (!!((dev)->features[0][6] & LMP_SIMUL_LE_BR)) #define lmp_ssp_capable(dev) ((dev)->features[0][6] & LMP_SIMPLE_PAIR) @@ -1794,7 +1818,10 @@ void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, u8 instance); +void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle); int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); +int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status); +int mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status); u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 1d1232917de7..61800a7b6192 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -207,6 +207,7 @@ struct l2cap_hdr { __le16 len; __le16 cid; } __packed; +#define L2CAP_LEN_SIZE 2 #define L2CAP_HDR_SIZE 4 #define L2CAP_ENH_HDR_SIZE 6 #define L2CAP_EXT_HDR_SIZE 8 diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index f9a6638e20b3..839a2028009e 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -821,6 +821,22 @@ struct mgmt_rp_add_ext_adv_data { __u8 instance; } __packed; +struct mgmt_adv_rssi_thresholds { + __s8 high_threshold; + __le16 high_threshold_timeout; + __s8 low_threshold; + __le16 low_threshold_timeout; + __u8 sampling_period; +} __packed; + +#define MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI 0x0056 +struct mgmt_cp_add_adv_patterns_monitor_rssi { + struct mgmt_adv_rssi_thresholds rssi; + __u8 pattern_count; + struct mgmt_adv_pattern patterns[]; +} __packed; +#define MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE 8 + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/include/net/devlink.h b/include/net/devlink.h index 47b4b063401b..853420db5d32 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1090,7 +1090,7 @@ enum devlink_trap_group_generic_id { #define DEVLINK_TRAP_GENERIC_NAME_BLACKHOLE_NEXTHOP \ "blackhole_nexthop" #define DEVLINK_TRAP_GENERIC_NAME_DMAC_FILTER \ - "dest_mac_filter" + "dmac_filter" #define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \ "l2_drops" diff --git a/include/net/dsa.h b/include/net/dsa.h index 60acb9fca124..d8de23ce7221 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -172,6 +172,10 @@ struct dsa_switch_tree { list_for_each_entry((_dp), &(_dst)->ports, list) \ if ((_dp)->lag_dev == (_lag)) +#define dsa_hsr_foreach_port(_dp, _ds, _hsr) \ + list_for_each_entry((_dp), &(_ds)->dst->ports, list) \ + if ((_dp)->ds == (_ds) && (_dp)->hsr_dev == (_hsr)) + static inline struct net_device *dsa_lag_dev(struct dsa_switch_tree *dst, unsigned int id) { @@ -264,6 +268,7 @@ struct dsa_port { struct phylink_config pl_config; struct net_device *lag_dev; bool lag_tx_enabled; + struct net_device *hsr_dev; struct list_head list; @@ -769,6 +774,14 @@ struct dsa_switch_ops { struct netdev_lag_upper_info *info); int (*port_lag_leave)(struct dsa_switch *ds, int port, struct net_device *lag); + + /* + * HSR integration + */ + int (*port_hsr_join)(struct dsa_switch *ds, int port, + struct net_device *hsr); + int (*port_hsr_leave)(struct dsa_switch *ds, int port, + struct net_device *hsr); }; #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ diff --git a/include/net/genetlink.h b/include/net/genetlink.h index e55ec1597ce7..7cb3fa8310ed 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -14,6 +14,7 @@ */ struct genl_multicast_group { char name[GENL_NAMSIZ]; + u8 flags; }; struct genl_ops; diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 1e262b23c68b..15b7fbe6b15c 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -195,7 +195,8 @@ struct fib6_info { fib6_destroying:1, offload:1, trap:1, - unused:2; + offload_failed:1, + unused:1; struct rcu_head rcu; struct nexthop *nh; @@ -539,7 +540,7 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric)); } void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i, - bool offload, bool trap); + bool offload, bool trap, bool offload_failed); #if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL) struct bpf_iter__ipv6_route { diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 2ec062aaa978..a914f33f3ed5 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -213,7 +213,8 @@ struct fib_rt_info { u8 type; u8 offload:1, trap:1, - unused:6; + offload_failed:1, + unused:5; }; struct fib_entry_notifier_info { diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 29567875f428..dcaee24a4d87 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -165,7 +165,7 @@ struct net { struct netns_xfrm xfrm; #endif - atomic64_t net_cookie; /* written once */ + u64 net_cookie; /* written once */ #if IS_ENABLED(CONFIG_IP_VS) struct netns_ipvs *ipvs; @@ -224,8 +224,6 @@ extern struct list_head net_namespace_list; struct net *get_net_ns_by_pid(pid_t pid); struct net *get_net_ns_by_fd(int fd); -u64 __net_gen_cookie(struct net *net); - #ifdef CONFIG_SYSCTL void ipx_register_sysctl(void); void ipx_unregister_sysctl(void); diff --git a/include/net/sock.h b/include/net/sock.h index 690e496a0e79..855c068c6c86 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -226,7 +226,7 @@ struct sock_common { struct hlist_nulls_node skc_nulls_node; }; unsigned short skc_tx_queue_mapping; -#ifdef CONFIG_XPS +#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING unsigned short skc_rx_queue_mapping; #endif union { @@ -356,7 +356,7 @@ struct sock { #define sk_nulls_node __sk_common.skc_nulls_node #define sk_refcnt __sk_common.skc_refcnt #define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping -#ifdef CONFIG_XPS +#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING #define sk_rx_queue_mapping __sk_common.skc_rx_queue_mapping #endif @@ -1838,7 +1838,7 @@ static inline int sk_tx_queue_get(const struct sock *sk) static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb) { -#ifdef CONFIG_XPS +#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING if (skb_rx_queue_recorded(skb)) { u16 rx_queue = skb_get_rx_queue(skb); @@ -1852,20 +1852,20 @@ static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb) static inline void sk_rx_queue_clear(struct sock *sk) { -#ifdef CONFIG_XPS +#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING sk->sk_rx_queue_mapping = NO_QUEUE_MAPPING; #endif } -#ifdef CONFIG_XPS static inline int sk_rx_queue_get(const struct sock *sk) { +#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING if (sk && sk->sk_rx_queue_mapping != NO_QUEUE_MAPPING) return sk->sk_rx_queue_mapping; +#endif return -1; } -#endif static inline void sk_set_socket(struct sock *sk, struct socket *sock) { diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 88fcac140966..6dcfc4c51a6e 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -28,7 +28,6 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, #if IS_ENABLED(CONFIG_BRIDGE_MRP) - SWITCHDEV_ATTR_ID_MRP_PORT_STATE, SWITCHDEV_ATTR_ID_MRP_PORT_ROLE, #endif }; @@ -48,7 +47,6 @@ struct switchdev_attr { u16 vlan_protocol; /* BRIDGE_VLAN_PROTOCOL */ bool mc_disabled; /* MC_DISABLED */ #if IS_ENABLED(CONFIG_BRIDGE_MRP) - u8 mrp_port_state; /* MRP_PORT_STATE */ u8 mrp_port_role; /* MRP_PORT_ROLE */ #endif } u; diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index d0d48e9620fb..bfce3df61bfd 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -730,6 +730,7 @@ struct ocelot_policer { /* I/O */ u32 ocelot_port_readl(struct ocelot_port *port, u32 reg); void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg); +void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg); u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset); void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset); void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg, @@ -758,6 +759,7 @@ int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset); int ocelot_get_ts_info(struct ocelot *ocelot, int port, struct ethtool_ts_info *info); void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs); +int ocelot_port_flush(struct ocelot *ocelot, int port); void ocelot_adjust_link(struct ocelot *ocelot, int port, struct phy_device *phydev); int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled); diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h index 9c8604c5b5f6..ea4692c339ce 100644 --- a/include/uapi/linux/batadv_packet.h +++ b/include/uapi/linux/batadv_packet.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) */ -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index bdb317faa1dc..35dc016c9bb4 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: MIT */ -/* Copyright (C) 2016-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Matthias Schiffer */ diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 3674a451a18c..c91578aaab32 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -36,6 +36,7 @@ enum { /* netlink interface */ #define MPTCP_PM_NAME "mptcp_pm" #define MPTCP_PM_CMD_GRP_NAME "mptcp_pm_cmds" +#define MPTCP_PM_EV_GRP_NAME "mptcp_pm_events" #define MPTCP_PM_VER 0x1 /* @@ -104,4 +105,77 @@ struct mptcp_info { __u64 mptcpi_rcv_nxt; }; +/* + * MPTCP_EVENT_CREATED: token, family, saddr4 | saddr6, daddr4 | daddr6, + * sport, dport + * A new MPTCP connection has been created. It is the good time to allocate + * memory and send ADD_ADDR if needed. Depending on the traffic-patterns + * it can take a long time until the MPTCP_EVENT_ESTABLISHED is sent. + * + * MPTCP_EVENT_ESTABLISHED: token, family, saddr4 | saddr6, daddr4 | daddr6, + * sport, dport + * A MPTCP connection is established (can start new subflows). + * + * MPTCP_EVENT_CLOSED: token + * A MPTCP connection has stopped. + * + * MPTCP_EVENT_ANNOUNCED: token, rem_id, family, daddr4 | daddr6 [, dport] + * A new address has been announced by the peer. + * + * MPTCP_EVENT_REMOVED: token, rem_id + * An address has been lost by the peer. + * + * MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6, + * daddr4 | daddr6, sport, dport, backup, + * if_idx [, error] + * A new subflow has been established. 'error' should not be set. + * + * MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6, + * sport, dport, backup, if_idx [, error] + * A subflow has been closed. An error (copy of sk_err) could be set if an + * error has been detected for this subflow. + * + * MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6, + * sport, dport, backup, if_idx [, error] + * The priority of a subflow has changed. 'error' should not be set. + */ +enum mptcp_event_type { + MPTCP_EVENT_UNSPEC = 0, + MPTCP_EVENT_CREATED = 1, + MPTCP_EVENT_ESTABLISHED = 2, + MPTCP_EVENT_CLOSED = 3, + + MPTCP_EVENT_ANNOUNCED = 6, + MPTCP_EVENT_REMOVED = 7, + + MPTCP_EVENT_SUB_ESTABLISHED = 10, + MPTCP_EVENT_SUB_CLOSED = 11, + + MPTCP_EVENT_SUB_PRIORITY = 13, +}; + +enum mptcp_event_attr { + MPTCP_ATTR_UNSPEC = 0, + + MPTCP_ATTR_TOKEN, /* u32 */ + MPTCP_ATTR_FAMILY, /* u16 */ + MPTCP_ATTR_LOC_ID, /* u8 */ + MPTCP_ATTR_REM_ID, /* u8 */ + MPTCP_ATTR_SADDR4, /* be32 */ + MPTCP_ATTR_SADDR6, /* struct in6_addr */ + MPTCP_ATTR_DADDR4, /* be32 */ + MPTCP_ATTR_DADDR6, /* struct in6_addr */ + MPTCP_ATTR_SPORT, /* be16 */ + MPTCP_ATTR_DPORT, /* be16 */ + MPTCP_ATTR_BACKUP, /* u8 */ + MPTCP_ATTR_ERROR, /* u8 */ + MPTCP_ATTR_FLAGS, /* u16 */ + MPTCP_ATTR_TIMEOUT, /* u32 */ + MPTCP_ATTR_IF_IDX, /* s32 */ + + __MPTCP_ATTR_AFTER_LAST +}; + +#define MPTCP_ATTR_MAX (__MPTCP_ATTR_AFTER_LAST - 1) + #endif /* _UAPI_MPTCP_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 90deb41c8a34..667f1aed091c 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -251,5 +251,8 @@ struct prctl_mm_map { #define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 # define PR_SYS_DISPATCH_ON 1 +/* The control values for the user space selector when dispatch is enabled */ +# define SYSCALL_DISPATCH_FILTER_ALLOW 0 +# define SYSCALL_DISPATCH_FILTER_BLOCK 1 #endif /* _LINUX_PRCTL_H */ diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index b841caa4657e..91e4ca064d61 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -319,6 +319,11 @@ enum rt_scope_t { #define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */ #define RTM_F_OFFLOAD 0x4000 /* route is offloaded */ #define RTM_F_TRAP 0x8000 /* route is trapping packets */ +#define RTM_F_OFFLOAD_FAILED 0x20000000 /* route offload failed, this value + * is chosen to avoid conflicts with + * other flags defined in + * include/uapi/linux/ipv6_route.h + */ /* Reserved table identifiers */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 42fc5a640df4..8fc09e8638b3 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -357,6 +357,6 @@ struct tcp_zerocopy_receive { __u64 msg_control; /* ancillary data */ __u64 msg_controllen; __u32 msg_flags; - /* __u32 hole; Next we must add >1 u32 otherwise length checks fail. */ + __u32 reserved; /* set to 0 for now */ }; #endif /* _UAPI_LINUX_TCP_H */ diff --git a/include/xen/events.h b/include/xen/events.h index 8ec418e30c7f..c204262d9fc2 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -12,10 +12,11 @@ #include <asm/xen/hypercall.h> #include <asm/xen/events.h> +struct xenbus_device; + unsigned xen_evtchn_nr_channels(void); int bind_evtchn_to_irq(evtchn_port_t evtchn); -int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn); int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, irq_handler_t handler, unsigned long irqflags, const char *devname, @@ -35,9 +36,9 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, unsigned long irqflags, const char *devname, void *dev_id); -int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain, +int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev, evtchn_port_t remote_port); -int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain, +int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device *dev, evtchn_port_t remote_port, irq_handler_t handler, unsigned long irqflags, diff --git a/init/Kconfig b/init/Kconfig index b77c60f8b963..29ad68325028 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -76,7 +76,6 @@ config CC_HAS_ASM_INLINE config CONSTRUCTORS bool - depends on !UML config IRQ_WORK bool diff --git a/init/init_task.c b/init/init_task.c index 8a992d73e6fb..3711cdaafed2 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -198,7 +198,8 @@ struct task_struct init_task .lockdep_recursion = 0, #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER - .ret_stack = NULL, + .ret_stack = NULL, + .tracing_graph_pause = ATOMIC_INIT(0), #endif #if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPTION) .trace_recursion = 0, diff --git a/init/main.c b/init/main.c index c68d784376ca..a626e78dbf06 100644 --- a/init/main.c +++ b/init/main.c @@ -1066,7 +1066,13 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) /* Call all constructor functions linked into the kernel. */ static void __init do_ctors(void) { -#ifdef CONFIG_CONSTRUCTORS +/* + * For UML, the constructors have already been called by the + * normal setup code as it's just a normal ELF binary, so we + * cannot do it again - but we do need CONFIG_CONSTRUCTORS + * even on UML for modules. + */ +#if defined(CONFIG_CONSTRUCTORS) && !defined(CONFIG_UML) ctor_fn_t *fn = (ctor_fn_t *) __ctors_start; for (; fn < (ctor_fn_t *) __ctors_end; fn++) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index cabaf7db8efc..be35bfb7fb13 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -114,6 +114,8 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) /* hash table size must be power of 2 */ n_buckets = roundup_pow_of_two(attr->max_entries); + if (!n_buckets) + return ERR_PTR(-E2BIG); cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 785d25392ead..1cffd4e84725 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6918,7 +6918,7 @@ static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode) case BPF_JSGT: if (reg->s32_min_value > sval) return 1; - else if (reg->s32_max_value < sval) + else if (reg->s32_max_value <= sval) return 0; break; case BPF_JLT: @@ -6991,7 +6991,7 @@ static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) case BPF_JSGT: if (reg->smin_value > sval) return 1; - else if (reg->smax_value < sval) + else if (reg->smax_value <= sval) return 0; break; case BPF_JLT: @@ -8631,7 +8631,11 @@ static bool range_within(struct bpf_reg_state *old, return old->umin_value <= cur->umin_value && old->umax_value >= cur->umax_value && old->smin_value <= cur->smin_value && - old->smax_value >= cur->smax_value; + old->smax_value >= cur->smax_value && + old->u32_min_value <= cur->u32_min_value && + old->u32_max_value >= cur->u32_max_value && + old->s32_min_value <= cur->s32_min_value && + old->s32_max_value >= cur->s32_max_value; } /* Maximum number of register states that can exist at once */ @@ -11117,30 +11121,28 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) insn->code == (BPF_ALU | BPF_MOD | BPF_X) || insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; - struct bpf_insn mask_and_div[] = { - BPF_MOV32_REG(insn->src_reg, insn->src_reg), + bool isdiv = BPF_OP(insn->code) == BPF_DIV; + struct bpf_insn *patchlet; + struct bpf_insn chk_and_div[] = { /* Rx div 0 -> 0 */ - BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JNE | BPF_K, insn->src_reg, + 0, 2, 0), BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg), BPF_JMP_IMM(BPF_JA, 0, 0, 1), *insn, }; - struct bpf_insn mask_and_mod[] = { - BPF_MOV32_REG(insn->src_reg, insn->src_reg), + struct bpf_insn chk_and_mod[] = { /* Rx mod 0 -> Rx */ - BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JEQ | BPF_K, insn->src_reg, + 0, 1, 0), *insn, }; - struct bpf_insn *patchlet; - if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || - insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { - patchlet = mask_and_div + (is64 ? 1 : 0); - cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0); - } else { - patchlet = mask_and_mod + (is64 ? 1 : 0); - cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0); - } + patchlet = isdiv ? chk_and_div : chk_and_mod; + cnt = isdiv ? ARRAY_SIZE(chk_and_div) : + ARRAY_SIZE(chk_and_mod); new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); if (!new_prog) diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c index 1b1b8ff875cb..da95df381483 100644 --- a/kernel/dma/map_benchmark.c +++ b/kernel/dma/map_benchmark.c @@ -36,7 +36,7 @@ struct map_benchmark { __s32 node; /* which numa node this benchmark will run on */ __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ - __u64 expansion[10]; /* For future use */ + __u8 expansion[84]; /* For future use */ }; struct map_benchmark_data { diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 6dd82be60df8..f9d491b17b78 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -209,15 +209,9 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs) lockdep_sys_exit(); } -#ifndef _TIF_SINGLESTEP -static inline bool report_single_step(unsigned long work) -{ - return false; -} -#else /* * If SYSCALL_EMU is set, then the only reason to report is when - * TIF_SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall + * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall * instruction has been already reported in syscall_enter_from_user_mode(). */ static inline bool report_single_step(unsigned long work) @@ -225,10 +219,8 @@ static inline bool report_single_step(unsigned long work) if (work & SYSCALL_WORK_SYSCALL_EMU) return false; - return !!(current_thread_info()->flags & _TIF_SINGLESTEP); + return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP; } -#endif - static void syscall_exit_work(struct pt_regs *regs, unsigned long work) { diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c index b0338a5625d9..c240302f56e2 100644 --- a/kernel/entry/syscall_user_dispatch.c +++ b/kernel/entry/syscall_user_dispatch.c @@ -50,10 +50,10 @@ bool syscall_user_dispatch(struct pt_regs *regs) if (unlikely(__get_user(state, sd->selector))) do_exit(SIGSEGV); - if (likely(state == PR_SYS_DISPATCH_OFF)) + if (likely(state == SYSCALL_DISPATCH_FILTER_ALLOW)) return false; - if (state != PR_SYS_DISPATCH_ON) + if (state != SYSCALL_DISPATCH_FILTER_BLOCK) do_exit(SIGSYS); } diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index 3110c77230c7..f62de2dea8a3 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -4,7 +4,7 @@ menu "GCOV-based kernel profiling" config GCOV_KERNEL bool "Enable gcov-based kernel profiling" depends on DEBUG_FS - select CONSTRUCTORS if !UML + select CONSTRUCTORS default n help This option enables gcov-based code profiling (e.g. for code coverage diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index dc0e2d7fbdfd..b338d622f26e 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -436,22 +436,22 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, can_reserve = msi_check_reservation_mode(domain, info, dev); - for_each_msi_entry(desc, dev) { - virq = desc->irq; - if (desc->nvec_used == 1) - dev_dbg(dev, "irq %d for MSI\n", virq); - else + /* + * This flag is set by the PCI layer as we need to activate + * the MSI entries before the PCI layer enables MSI in the + * card. Otherwise the card latches a random msi message. + */ + if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY)) + goto skip_activate; + + for_each_msi_vector(desc, i, dev) { + if (desc->irq == i) { + virq = desc->irq; dev_dbg(dev, "irq [%d-%d] for MSI\n", virq, virq + desc->nvec_used - 1); - /* - * This flag is set by the PCI layer as we need to activate - * the MSI entries before the PCI layer enables MSI in the - * card. Otherwise the card latches a random msi message. - */ - if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY)) - continue; + } - irq_data = irq_domain_get_irq_data(domain, desc->irq); + irq_data = irq_domain_get_irq_data(domain, i); if (!can_reserve) { irqd_clr_can_reserve(irq_data); if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK) @@ -462,28 +462,24 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, goto cleanup; } +skip_activate: /* * If these interrupts use reservation mode, clear the activated bit * so request_irq() will assign the final vector. */ if (can_reserve) { - for_each_msi_entry(desc, dev) { - irq_data = irq_domain_get_irq_data(domain, desc->irq); + for_each_msi_vector(desc, i, dev) { + irq_data = irq_domain_get_irq_data(domain, i); irqd_clr_activated(irq_data); } } return 0; cleanup: - for_each_msi_entry(desc, dev) { - struct irq_data *irqd; - - if (desc->irq == virq) - break; - - irqd = irq_domain_get_irq_data(domain, desc->irq); - if (irqd_is_activated(irqd)) - irq_domain_deactivate_irq(irqd); + for_each_msi_vector(desc, i, dev) { + irq_data = irq_domain_get_irq_data(domain, i); + if (irqd_is_activated(irq_data)) + irq_domain_deactivate_irq(irq_data); } msi_domain_free_irqs(domain, dev); return ret; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index f7fb5d135930..d5a3eb74a657 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1954,28 +1954,48 @@ bool __weak arch_kprobe_on_func_entry(unsigned long offset) return !offset; } -bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset) +/** + * kprobe_on_func_entry() -- check whether given address is function entry + * @addr: Target address + * @sym: Target symbol name + * @offset: The offset from the symbol or the address + * + * This checks whether the given @addr+@offset or @sym+@offset is on the + * function entry address or not. + * This returns 0 if it is the function entry, or -EINVAL if it is not. + * And also it returns -ENOENT if it fails the symbol or address lookup. + * Caller must pass @addr or @sym (either one must be NULL), or this + * returns -EINVAL. + */ +int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset) { kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset); if (IS_ERR(kp_addr)) - return false; + return PTR_ERR(kp_addr); - if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) || - !arch_kprobe_on_func_entry(offset)) - return false; + if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset)) + return -ENOENT; - return true; + if (!arch_kprobe_on_func_entry(offset)) + return -EINVAL; + + return 0; } int register_kretprobe(struct kretprobe *rp) { - int ret = 0; + int ret; struct kretprobe_instance *inst; int i; void *addr; - if (!kprobe_on_func_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset)) + ret = kprobe_on_func_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset); + if (ret) + return ret; + + /* If only rp->kp.addr is specified, check reregistering kprobes */ + if (rp->kp.addr && check_kprobe_rereg(&rp->kp)) return -EINVAL; if (kretprobe_blacklist_size) { diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 87389b9e21ab..5247afd7f345 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -502,7 +502,7 @@ static struct hrtimer sync_hrtimer; static enum hrtimer_restart sync_timer_callback(struct hrtimer *timer) { - queue_work(system_power_efficient_wq, &sync_work); + queue_work(system_freezable_power_efficient_wq, &sync_work); return HRTIMER_NORESTART; } @@ -668,7 +668,7 @@ void ntp_notify_cmos_timer(void) * just a pointless work scheduled. */ if (ntp_synced() && !hrtimer_is_queued(&sync_hrtimer)) - queue_work(system_power_efficient_wq, &sync_work); + queue_work(system_freezable_power_efficient_wq, &sync_work); } static void __init ntp_init_cmos_sync(void) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 6c0018abe68a..764400260eb6 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -96,9 +96,6 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { unsigned int ret; - if (in_nmi()) /* not supported yet */ - return 1; - cant_sleep(); if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 73edb9e4f354..29a6ebeebc9e 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -394,7 +394,6 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) } if (t->ret_stack == NULL) { - atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); t->curr_ret_stack = -1; t->curr_ret_depth = -1; @@ -489,7 +488,6 @@ static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack); static void graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) { - atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); t->ftrace_timestamp = 0; /* make curr_ret_stack visible before we add the ret_stack */ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e9d28eeccb7e..d387b774ceeb 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1212,7 +1212,8 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { call = file->event_call; - if (!trace_event_name(call) || !call->class || !call->class->reg) + if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || + !trace_event_name(call) || !call->class || !call->class->reg) continue; if (system && strcmp(call->class->system, system->name) != 0) diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index d06aab4dcbb8..6756379b661f 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -562,6 +562,8 @@ static int __irqsoff_tracer_init(struct trace_array *tr) /* non overwrite screws up the latency tracers */ set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1); set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1); + /* without pause, we will produce garbage if another latency occurs */ + set_tracer_flag(tr, TRACE_ITER_PAUSE_ON_TRACE, 1); tr->max_latency = 0; irqsoff_trace = tr; @@ -583,11 +585,13 @@ static void __irqsoff_tracer_reset(struct trace_array *tr) { int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT; int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE; + int pause_flag = save_flags & TRACE_ITER_PAUSE_ON_TRACE; stop_irqsoff_tracer(tr, is_graph(tr)); set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag); set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag); + set_tracer_flag(tr, TRACE_ITER_PAUSE_ON_TRACE, pause_flag); ftrace_reset_array_ops(tr); irqsoff_busy = false; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index e6fba1798771..56c7fbff7bd7 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -221,9 +221,9 @@ bool trace_kprobe_on_func_entry(struct trace_event_call *call) { struct trace_kprobe *tk = trace_kprobe_primary_from_call(call); - return tk ? kprobe_on_func_entry(tk->rp.kp.addr, + return tk ? (kprobe_on_func_entry(tk->rp.kp.addr, tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name, - tk->rp.kp.addr ? 0 : tk->rp.kp.offset) : false; + tk->rp.kp.addr ? 0 : tk->rp.kp.offset) == 0) : false; } bool trace_kprobe_error_injectable(struct trace_event_call *call) @@ -828,9 +828,11 @@ static int trace_kprobe_create(int argc, const char *argv[]) } if (is_return) flags |= TPARG_FL_RETURN; - if (kprobe_on_func_entry(NULL, symbol, offset)) + ret = kprobe_on_func_entry(NULL, symbol, offset); + if (ret == 0) flags |= TPARG_FL_FENTRY; - if (offset && is_return && !(flags & TPARG_FL_FENTRY)) { + /* Defer the ENOENT case until register kprobe */ + if (ret == -EINVAL && is_return) { trace_probe_log_err(0, BAD_RETPROBE); goto parse_error; } diff --git a/lib/cpumask.c b/lib/cpumask.c index 35924025097b..c3c76b833384 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -6,7 +6,6 @@ #include <linux/export.h> #include <linux/memblock.h> #include <linux/numa.h> -#include <linux/sched/isolation.h> /** * cpumask_next - get the next cpu in a cpumask @@ -206,27 +205,22 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) */ unsigned int cpumask_local_spread(unsigned int i, int node) { - int cpu, hk_flags; - const struct cpumask *mask; + int cpu; - hk_flags = HK_FLAG_DOMAIN | HK_FLAG_MANAGED_IRQ; - mask = housekeeping_cpumask(hk_flags); /* Wrap: we always want a cpu. */ - i %= cpumask_weight(mask); + i %= num_online_cpus(); if (node == NUMA_NO_NODE) { - for_each_cpu(cpu, mask) { + for_each_cpu(cpu, cpu_online_mask) if (i-- == 0) return cpu; - } } else { /* NUMA first. */ - for_each_cpu_and(cpu, cpumask_of_node(node), mask) { + for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask) if (i-- == 0) return cpu; - } - for_each_cpu(cpu, mask) { + for_each_cpu(cpu, cpu_online_mask) { /* Skip NUMA nodes, done above. */ if (cpumask_test_cpu(cpu, cpumask_of_node(node))) continue; diff --git a/lib/iov_iter.c b/lib/iov_iter.c index a21e6a5792c5..f0b2ccb1bb01 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -592,14 +592,15 @@ static __wsum csum_and_memcpy(void *to, const void *from, size_t len, } static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, - __wsum *csum, struct iov_iter *i) + struct csum_state *csstate, + struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; unsigned int p_mask = pipe->ring_size - 1; + __wsum sum = csstate->csum; + size_t off = csstate->off; unsigned int i_head; size_t n, r; - size_t off = 0; - __wsum sum = *csum; if (!sanity(i)) return 0; @@ -621,7 +622,8 @@ static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, i_head++; } while (n); i->count -= bytes; - *csum = sum; + csstate->csum = sum; + csstate->off = off; return bytes; } @@ -1522,18 +1524,19 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, } EXPORT_SYMBOL(csum_and_copy_from_iter_full); -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, +size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, struct iov_iter *i) { + struct csum_state *csstate = _csstate; const char *from = addr; - __wsum *csum = csump; __wsum sum, next; - size_t off = 0; + size_t off; if (unlikely(iov_iter_is_pipe(i))) - return csum_and_copy_to_pipe_iter(addr, bytes, csum, i); + return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i); - sum = *csum; + sum = csstate->csum; + off = csstate->off; if (unlikely(iov_iter_is_discard(i))) { WARN_ON(1); /* for now */ return 0; @@ -1561,7 +1564,8 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, off += v.iov_len; }) ) - *csum = sum; + csstate->csum = sum; + csstate->off = off; return bytes; } EXPORT_SYMBOL(csum_and_copy_to_iter); diff --git a/lib/ubsan.c b/lib/ubsan.c index 3e3352f3d0da..bec38c64d6a6 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -427,3 +427,34 @@ void __ubsan_handle_load_invalid_value(void *_data, void *val) ubsan_epilogue(); } EXPORT_SYMBOL(__ubsan_handle_load_invalid_value); + +void __ubsan_handle_alignment_assumption(void *_data, unsigned long ptr, + unsigned long align, + unsigned long offset); +void __ubsan_handle_alignment_assumption(void *_data, unsigned long ptr, + unsigned long align, + unsigned long offset) +{ + struct alignment_assumption_data *data = _data; + unsigned long real_ptr; + + if (suppress_report(&data->location)) + return; + + ubsan_prologue(&data->location, "alignment-assumption"); + + if (offset) + pr_err("assumption of %lu byte alignment (with offset of %lu byte) for pointer of type %s failed", + align, offset, data->type->type_name); + else + pr_err("assumption of %lu byte alignment for pointer of type %s failed", + align, data->type->type_name); + + real_ptr = ptr - offset; + pr_err("%saddress is %lu aligned, misalignment offset is %lu bytes", + offset ? "offset " : "", BIT(real_ptr ? __ffs(real_ptr) : 0), + real_ptr & (align - 1)); + + ubsan_epilogue(); +} +EXPORT_SYMBOL(__ubsan_handle_alignment_assumption); diff --git a/lib/ubsan.h b/lib/ubsan.h index 7b56c09473a9..9a0b71c5ff9f 100644 --- a/lib/ubsan.h +++ b/lib/ubsan.h @@ -78,6 +78,12 @@ struct invalid_value_data { struct type_descriptor *type; }; +struct alignment_assumption_data { + struct source_location location; + struct source_location assumption_location; + struct type_descriptor *type; +}; + #if defined(CONFIG_ARCH_SUPPORTS_INT128) typedef __int128 s_max; typedef unsigned __int128 u_max; diff --git a/mm/compaction.c b/mm/compaction.c index e5acb9714436..190ccdaa6c19 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1342,7 +1342,7 @@ fast_isolate_freepages(struct compact_control *cc) { unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1); unsigned int nr_scanned = 0; - unsigned long low_pfn, min_pfn, high_pfn = 0, highest = 0; + unsigned long low_pfn, min_pfn, highest = 0; unsigned long nr_isolated = 0; unsigned long distance; struct page *page = NULL; @@ -1387,6 +1387,7 @@ fast_isolate_freepages(struct compact_control *cc) struct page *freepage; unsigned long flags; unsigned int order_scanned = 0; + unsigned long high_pfn = 0; if (!area->nr_free) continue; diff --git a/mm/filemap.c b/mm/filemap.c index 5c9d564317a5..aa0e0fb04670 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -835,6 +835,7 @@ noinline int __add_to_page_cache_locked(struct page *page, XA_STATE(xas, &mapping->i_pages, offset); int huge = PageHuge(page); int error; + bool charged = false; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageSwapBacked(page), page); @@ -848,6 +849,7 @@ noinline int __add_to_page_cache_locked(struct page *page, error = mem_cgroup_charge(page, current->mm, gfp); if (error) goto error; + charged = true; } gfp &= GFP_RECLAIM_MASK; @@ -896,6 +898,8 @@ unlock: if (xas_error(&xas)) { error = xas_error(&xas); + if (charged) + mem_cgroup_uncharge(page); goto error; } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9237976abe72..91ca9b103ee5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2202,7 +2202,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, { spinlock_t *ptl; struct mmu_notifier_range range; - bool was_locked = false; + bool do_unlock_page = false; pmd_t _pmd; mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, @@ -2218,7 +2218,6 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, VM_BUG_ON(freeze && !page); if (page) { VM_WARN_ON_ONCE(!PageLocked(page)); - was_locked = true; if (page != pmd_page(*pmd)) goto out; } @@ -2227,19 +2226,29 @@ repeat: if (pmd_trans_huge(*pmd)) { if (!page) { page = pmd_page(*pmd); - if (unlikely(!trylock_page(page))) { - get_page(page); - _pmd = *pmd; - spin_unlock(ptl); - lock_page(page); - spin_lock(ptl); - if (unlikely(!pmd_same(*pmd, _pmd))) { - unlock_page(page); + /* + * An anonymous page must be locked, to ensure that a + * concurrent reuse_swap_page() sees stable mapcount; + * but reuse_swap_page() is not used on shmem or file, + * and page lock must not be taken when zap_pmd_range() + * calls __split_huge_pmd() while i_mmap_lock is held. + */ + if (PageAnon(page)) { + if (unlikely(!trylock_page(page))) { + get_page(page); + _pmd = *pmd; + spin_unlock(ptl); + lock_page(page); + spin_lock(ptl); + if (unlikely(!pmd_same(*pmd, _pmd))) { + unlock_page(page); + put_page(page); + page = NULL; + goto repeat; + } put_page(page); - page = NULL; - goto repeat; } - put_page(page); + do_unlock_page = true; } } if (PageMlocked(page)) @@ -2249,7 +2258,7 @@ repeat: __split_huge_pmd_locked(vma, pmd, range.start, freeze); out: spin_unlock(ptl); - if (!was_locked && page) + if (do_unlock_page) unlock_page(page); /* * No need to double call mmu_notifier->invalidate_range() callback. diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 18f6ee317900..4bdb58ab14cb 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -79,6 +79,21 @@ DEFINE_SPINLOCK(hugetlb_lock); static int num_fault_mutexes; struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp; +static inline bool PageHugeFreed(struct page *head) +{ + return page_private(head + 4) == -1UL; +} + +static inline void SetPageHugeFreed(struct page *head) +{ + set_page_private(head + 4, -1UL); +} + +static inline void ClearPageHugeFreed(struct page *head) +{ + set_page_private(head + 4, 0); +} + /* Forward declaration */ static int hugetlb_acct_memory(struct hstate *h, long delta); @@ -1028,6 +1043,7 @@ static void enqueue_huge_page(struct hstate *h, struct page *page) list_move(&page->lru, &h->hugepage_freelists[nid]); h->free_huge_pages++; h->free_huge_pages_node[nid]++; + SetPageHugeFreed(page); } static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) @@ -1044,6 +1060,7 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) list_move(&page->lru, &h->hugepage_activelist); set_page_refcounted(page); + ClearPageHugeFreed(page); h->free_huge_pages--; h->free_huge_pages_node[nid]--; return page; @@ -1344,12 +1361,11 @@ struct hstate *size_to_hstate(unsigned long size) */ bool page_huge_active(struct page *page) { - VM_BUG_ON_PAGE(!PageHuge(page), page); - return PageHead(page) && PagePrivate(&page[1]); + return PageHeadHuge(page) && PagePrivate(&page[1]); } /* never called for tail page */ -static void set_page_huge_active(struct page *page) +void set_page_huge_active(struct page *page) { VM_BUG_ON_PAGE(!PageHeadHuge(page), page); SetPagePrivate(&page[1]); @@ -1505,6 +1521,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) spin_lock(&hugetlb_lock); h->nr_huge_pages++; h->nr_huge_pages_node[nid]++; + ClearPageHugeFreed(page); spin_unlock(&hugetlb_lock); } @@ -1755,6 +1772,7 @@ int dissolve_free_huge_page(struct page *page) { int rc = -EBUSY; +retry: /* Not to disrupt normal path by vainly holding hugetlb_lock */ if (!PageHuge(page)) return 0; @@ -1771,6 +1789,26 @@ int dissolve_free_huge_page(struct page *page) int nid = page_to_nid(head); if (h->free_huge_pages - h->resv_huge_pages == 0) goto out; + + /* + * We should make sure that the page is already on the free list + * when it is dissolved. + */ + if (unlikely(!PageHugeFreed(head))) { + spin_unlock(&hugetlb_lock); + cond_resched(); + + /* + * Theoretically, we should return -EBUSY when we + * encounter this race. In fact, we have a chance + * to successfully dissolve the page if we do a + * retry. Because the race window is quite small. + * If we seize this opportunity, it is an optimization + * for increasing the success rate of dissolving page. + */ + goto retry; + } + /* * Move PageHWPoison flag from head page to the raw error page, * which makes any subpages rather than the error page reusable. @@ -2009,13 +2047,16 @@ retry: /* Free the needed pages to the hugetlb pool */ list_for_each_entry_safe(page, tmp, &surplus_list, lru) { + int zeroed; + if ((--needed) < 0) break; /* * This page is now managed by the hugetlb allocator and has * no users -- drop the buddy allocator's reference. */ - VM_BUG_ON_PAGE(!put_page_testzero(page), page); + zeroed = put_page_testzero(page); + VM_BUG_ON_PAGE(!zeroed, page); enqueue_huge_page(h, page); } free: @@ -5555,9 +5596,9 @@ bool isolate_huge_page(struct page *page, struct list_head *list) { bool ret = true; - VM_BUG_ON_PAGE(!PageHead(page), page); spin_lock(&hugetlb_lock); - if (!page_huge_active(page) || !get_page_unless_zero(page)) { + if (!PageHeadHuge(page) || !page_huge_active(page) || + !get_page_unless_zero(page)) { ret = false; goto unlock; } diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index e529428e7a11..d558799b25b3 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -134,12 +134,8 @@ void __init kasan_init_hw_tags(void) switch (kasan_arg_stacktrace) { case KASAN_ARG_STACKTRACE_DEFAULT: - /* - * Default to enabling stack trace collection for - * debug kernels. - */ - if (IS_ENABLED(CONFIG_DEBUG_KERNEL)) - static_branch_enable(&kasan_flag_stacktrace); + /* Default to enabling stack trace collection. */ + static_branch_enable(&kasan_flag_stacktrace); break; case KASAN_ARG_STACKTRACE_OFF: /* Do nothing, kasan_flag_stacktrace keeps its default value. */ diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index cc4d9e1d49b1..8c706e7652f2 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -209,7 +209,7 @@ bool check_memory_region(unsigned long addr, size_t size, bool write, static inline bool addr_has_metadata(const void *addr) { - return true; + return (is_vmalloc_addr(addr) || virt_addr_valid(addr)); } #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ diff --git a/mm/memblock.c b/mm/memblock.c index 1eaaec1e7687..8d9b5f1e7040 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -275,14 +275,6 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, * * Find @size free area aligned to @align in the specified range and node. * - * When allocation direction is bottom-up, the @start should be greater - * than the end of the kernel image. Otherwise, it will be trimmed. The - * reason is that we want the bottom-up allocation just near the kernel - * image so it is highly likely that the allocated memory and the kernel - * will reside in the same node. - * - * If bottom-up allocation failed, will try to allocate memory top-down. - * * Return: * Found address on success, 0 on failure. */ @@ -291,8 +283,6 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, phys_addr_t end, int nid, enum memblock_flags flags) { - phys_addr_t kernel_end, ret; - /* pump up @end */ if (end == MEMBLOCK_ALLOC_ACCESSIBLE || end == MEMBLOCK_ALLOC_KASAN) @@ -301,40 +291,13 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, /* avoid allocating the first page */ start = max_t(phys_addr_t, start, PAGE_SIZE); end = max(start, end); - kernel_end = __pa_symbol(_end); - - /* - * try bottom-up allocation only when bottom-up mode - * is set and @end is above the kernel image. - */ - if (memblock_bottom_up() && end > kernel_end) { - phys_addr_t bottom_up_start; - - /* make sure we will allocate above the kernel */ - bottom_up_start = max(start, kernel_end); - /* ok, try bottom-up allocation first */ - ret = __memblock_find_range_bottom_up(bottom_up_start, end, - size, align, nid, flags); - if (ret) - return ret; - - /* - * we always limit bottom-up allocation above the kernel, - * but top-down allocation doesn't have the limit, so - * retrying top-down allocation may succeed when bottom-up - * allocation failed. - * - * bottom-up allocation is expected to be fail very rarely, - * so we use WARN_ONCE() here to see the stack trace if - * fail happens. - */ - WARN_ONCE(IS_ENABLED(CONFIG_MEMORY_HOTREMOVE), - "memblock: bottom-up allocation failed, memory hotremove may be affected\n"); - } - - return __memblock_find_range_top_down(start, end, size, align, nid, - flags); + if (memblock_bottom_up()) + return __memblock_find_range_bottom_up(start, end, size, align, + nid, flags); + else + return __memblock_find_range_top_down(start, end, size, align, + nid, flags); } /** diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e2de77b5bcc2..913c2b9e5c72 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6271,6 +6271,8 @@ static ssize_t memory_high_write(struct kernfs_open_file *of, if (err) return err; + page_counter_set_high(&memcg->memory, high); + for (;;) { unsigned long nr_pages = page_counter_read(&memcg->memory); unsigned long reclaimed; @@ -6294,10 +6296,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of, break; } - page_counter_set_high(&memcg->memory, high); - memcg_wb_domain_size_changed(memcg); - return nbytes; } diff --git a/mm/migrate.c b/mm/migrate.c index c0efe921bca5..20ca887ea769 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1280,6 +1280,12 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, return -ENOSYS; } + if (page_count(hpage) == 1) { + /* page was freed from under us. So we are done. */ + putback_active_hugepage(hpage); + return MIGRATEPAGE_SUCCESS; + } + new_hpage = get_new_page(hpage, private); if (!new_hpage) return -ENOMEM; diff --git a/mm/mremap.c b/mm/mremap.c index f554320281cc..aa63bfd3cad2 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -336,8 +336,9 @@ enum pgt_entry { * valid. Else returns a smaller extent bounded by the end of the source and * destination pgt_entry. */ -static unsigned long get_extent(enum pgt_entry entry, unsigned long old_addr, - unsigned long old_end, unsigned long new_addr) +static __always_inline unsigned long get_extent(enum pgt_entry entry, + unsigned long old_addr, unsigned long old_end, + unsigned long new_addr) { unsigned long next, extent, mask, size; diff --git a/mm/slub.c b/mm/slub.c index 7ecbbbe5bc0c..b22a4b101c84 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3423,6 +3423,7 @@ static inline int calculate_order(unsigned int size) unsigned int order; unsigned int min_objects; unsigned int max_objects; + unsigned int nr_cpus; /* * Attempt to find best configuration for a slab. This @@ -3433,8 +3434,21 @@ static inline int calculate_order(unsigned int size) * we reduce the minimum objects required in a slab. */ min_objects = slub_min_objects; - if (!min_objects) - min_objects = 4 * (fls(num_online_cpus()) + 1); + if (!min_objects) { + /* + * Some architectures will only update present cpus when + * onlining them, so don't trust the number if it's just 1. But + * we also don't want to use nr_cpu_ids always, as on some other + * architectures, there can be many possible cpus, but never + * onlined. Here we compromise between trying to avoid too high + * order on systems that appear larger than they are, and too + * low order on systems that appear smaller than they are. + */ + nr_cpus = num_present_cpus(); + if (nr_cpus <= 1) + nr_cpus = nr_cpu_ids; + min_objects = 4 * (fls(nr_cpus) + 1); + } max_objects = order_objects(slub_max_order, size); min_objects = min(min_objects, max_objects); diff --git a/net/Kconfig b/net/Kconfig index f4c32d982af6..8cea808ad9e8 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -256,9 +256,13 @@ config RFS_ACCEL select CPU_RMAP default y +config SOCK_RX_QUEUE_MAPPING + bool + config XPS bool depends on SMP + select SOCK_RX_QUEUE_MAPPING default y config HWBM diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index 43ae3dcbbbeb..860a0786bc1e 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -# Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +# Copyright (C) B.A.T.M.A.N. contributors: # # Marek Lindner, Simon Wunderlich diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 8010c34b987c..3bd0760c76a2 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -# Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +# Copyright (C) B.A.T.M.A.N. contributors: # # Marek Lindner, Simon Wunderlich diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c index c5f404f6892f..4eee53d19eb0 100644 --- a/net/batman-adv/bat_algo.c +++ b/net/batman-adv/bat_algo.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h index 43b045ac8ac7..2c486374af58 100644 --- a/net/batman-adv/bat_algo.h +++ b/net/batman-adv/bat_algo.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Linus Lüssing */ diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 168621c9a081..a5e313cd6f44 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h index 0c57c1000c64..04b01bd684e8 100644 --- a/net/batman-adv/bat_iv_ogm.h +++ b/net/batman-adv/bat_iv_ogm.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index e4455babe4c2..e1ca2b8c3152 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner */ diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h index 5e0be10bc84e..964431f4dc8d 100644 --- a/net/batman-adv/bat_v.h +++ b/net/batman-adv/bat_v.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Linus Lüssing */ diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 0512ea6cd818..423c2d171703 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner */ diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h index 4358d436be2a..9e2740195fa2 100644 --- a/net/batman-adv/bat_v_elp.h +++ b/net/batman-adv/bat_v_elp.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner */ diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 798d659855d0..a0a9636d1740 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Antonio Quartulli */ diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h index 0ae2575f70bb..edeffedecade 100644 --- a/net/batman-adv/bat_v_ogm.h +++ b/net/batman-adv/bat_v_ogm.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Antonio Quartulli */ diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index 4bc695cda397..649c41f393e1 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2006-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner */ diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index 533c6d44cb58..37f7ae413bc6 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2006-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner */ diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index d2de12e527ba..360bdbf44748 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Simon Wunderlich */ diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 7dc6d3571925..5c22955bb9d5 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Simon Wunderlich */ diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index fd7ba6bbdf85..8c95a11a830a 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Antonio Quartulli */ @@ -87,7 +87,7 @@ struct batadv_dhcp_packet { __u8 sname[64]; __u8 file[128]; __be32 magic; - __u8 options[]; + /* __u8 options[]; */ }; #define BATADV_DHCP_YIADDR_LEN sizeof(((struct batadv_dhcp_packet *)0)->yiaddr) @@ -1564,7 +1564,7 @@ static int batadv_dat_get_dhcp_message_type(struct sk_buff *skb) } /** - * batadv_dat_get_dhcp_yiaddr() - get yiaddr from a DHCP packet + * batadv_dat_dhcp_get_yiaddr() - get yiaddr from a DHCP packet * @skb: the DHCP packet to parse * @buf: a buffer to store the yiaddr in * diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index e980fb45693a..bed7f3d20844 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2011-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Antonio Quartulli */ diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index e522f1fcfd9a..a5d9d800082b 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Martin Hundebøll <martin@hundeboll.net> */ diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h index 881ef328b6cd..dbf0871f8703 100644 --- a/net/batman-adv/fragmentation.h +++ b/net/batman-adv/fragmentation.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2013-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Martin Hundebøll <martin@hundeboll.net> */ diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index cffe72f4edd7..007f2827935d 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2009-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner */ diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 2fbc500f0ac1..2ae5846ef958 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2009-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner */ diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 16cd9450ceb1..fdde305a198e 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2009-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner */ diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index c3a0c5a7f7e9..87c37f907261 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2009-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner */ diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 0f186ddc15e3..4a6a25d551a8 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index f4b8e9efef19..83d11b46a9d8 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index 68638e0450a6..8016e619787f 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2006-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner */ diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index 91ae9f32b580..46696759f194 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2006-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner */ diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index b7e9923b11a2..f0e5d1429662 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2010-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner */ diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h index 979864c0fa6b..6717c965f0fa 100644 --- a/net/batman-adv/log.h +++ b/net/batman-adv/log.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index ed9d87ce3407..e48f7ac8a854 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 288201630ceb..8f0102b71656 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2021.0" +#define BATADV_SOURCE_VERSION "2021.1" #endif /* B.A.T.M.A.N. parameters */ diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 854e5ff28a3f..28166402d30c 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2014-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Linus Lüssing */ @@ -828,7 +828,7 @@ batadv_mcast_bridge_log(struct batadv_priv *bat_priv, } /** - * batadv_mcast_flags_logs() - output debug information about mcast flag changes + * batadv_mcast_flags_log() - output debug information about mcast flag changes * @bat_priv: the bat priv with all the soft interface information * @flags: TVLV flags indicating the new multicast state * diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index d61593d02072..9fee5da08311 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2014-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Linus Lüssing */ diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 97bcf149633d..f317d206b411 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2016-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Matthias Schiffer */ @@ -193,7 +193,7 @@ static int batadv_netlink_mesh_fill_ap_isolation(struct sk_buff *msg, } /** - * batadv_option_set_ap_isolation() - Set ap_isolation from genl msg + * batadv_netlink_set_mesh_ap_isolation() - Set ap_isolation from genl msg * @attr: parsed BATADV_ATTR_AP_ISOLATION_ENABLED attribute * @bat_priv: the bat priv with all the soft interface information * @@ -757,7 +757,7 @@ batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info) } /** - * batadv_netlink_tp_meter_start() - Cancel a running tp_meter session + * batadv_netlink_tp_meter_cancel() - Cancel a running tp_meter session * @skb: received netlink message * @info: receiver information * diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h index 7ee48f916997..48102cc7490c 100644 --- a/net/batman-adv/netlink.h +++ b/net/batman-adv/netlink.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2016-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Matthias Schiffer */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 0cec108b7a99..4bb76b434d07 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2012-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Martin Hundebøll, Jeppe Ledet-Pedersen */ diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index 8fb2c01e7837..368cc3130e4c 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2012-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Martin Hundebøll, Jeppe Ledet-Pedersen */ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 77431e59b228..da7249448474 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2009-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index e75d4c4d11f5..805be87d55b8 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 49cbca4aa428..40f5cffde6a3 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 2ed49db6eff5..5f387786e9a7 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 87017332b567..157abe92d827 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 0d36e15589f6..2b0daf8b2bc4 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 97118efbe678..6b8181bc3122 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 74716d9ca4f6..38b0ad182584 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner */ diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index d4e10005df6c..789c851732b7 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2012-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Edo Monticelli, Antonio Quartulli */ @@ -131,7 +131,7 @@ static u32 batadv_tp_cwnd(u32 base, u32 increment, u32 min) } /** - * batadv_tp_updated_cwnd() - update the Congestion Windows + * batadv_tp_update_cwnd() - update the Congestion Windows * @tp_vars: the private data of the current TP meter session * @mss: maximum segment size of transmission * diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h index 140105215aa2..f0046d366eac 100644 --- a/net/batman-adv/tp_meter.h +++ b/net/batman-adv/tp_meter.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2012-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Edo Monticelli, Antonio Quartulli */ diff --git a/net/batman-adv/trace.c b/net/batman-adv/trace.c index 3444d9e4e90d..ec8b9519076b 100644 --- a/net/batman-adv/trace.c +++ b/net/batman-adv/trace.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2010-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Sven Eckelmann */ diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h index a87547570b4e..d673ebdd0426 100644 --- a/net/batman-adv/trace.h +++ b/net/batman-adv/trace.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2010-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Sven Eckelmann */ diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index cd09916f97fe..f8761281aab0 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli */ diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 57192c817229..e1285904f885 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli */ diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 6a23a566cde1..253f5a33a914 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h index d509d00c7a23..54f2a35653d0 100644 --- a/net/batman-adv/tvlv.h +++ b/net/batman-adv/tvlv.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 2f96e96a5ca4..7c0b475cc22a 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2020 B.A.T.M.A.N. contributors: +/* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ @@ -228,7 +228,8 @@ struct batadv_hard_iface { }; /** - * struct batadv_orig_ifinfo - B.A.T.M.A.N. IV private orig_ifinfo members + * struct batadv_orig_ifinfo_bat_iv - B.A.T.M.A.N. IV private orig_ifinfo + * members */ struct batadv_orig_ifinfo_bat_iv { /** diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index da7fd7c8c2dc..463bad58478b 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -381,9 +381,9 @@ static int a2mp_getampassoc_req(struct amp_mgr *mgr, struct sk_buff *skb, hdev = hci_dev_get(req->id); if (!hdev || hdev->amp_type == AMP_TYPE_BREDR || tmp) { struct a2mp_amp_assoc_rsp rsp; - rsp.id = req->id; memset(&rsp, 0, sizeof(rsp)); + rsp.id = req->id; if (tmp) { rsp.status = A2MP_STATUS_COLLISION_OCCURED; @@ -512,6 +512,7 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb, assoc = kmemdup(req->amp_assoc, assoc_len, GFP_KERNEL); if (!assoc) { amp_ctrl_put(ctrl); + hci_dev_put(hdev); return -ENOMEM; } diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 4ef6a54403aa..1661979b6a6e 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -72,8 +72,8 @@ void bt_sock_reclassify_lock(struct sock *sk, int proto) BUG_ON(!sock_allow_reclassification(sk)); sock_lock_init_class_and_name(sk, - bt_slock_key_strings[proto], &bt_slock_key[proto], - bt_key_strings[proto], &bt_lock_key[proto]); + bt_slock_key_strings[proto], &bt_slock_key[proto], + bt_key_strings[proto], &bt_lock_key[proto]); } EXPORT_SYMBOL(bt_sock_reclassify_lock); @@ -451,7 +451,7 @@ static inline __poll_t bt_accept_poll(struct sock *parent) } __poll_t bt_sock_poll(struct file *file, struct socket *sock, - poll_table *wait) + poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; @@ -478,8 +478,8 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock, mask |= EPOLLHUP; if (sk->sk_state == BT_CONNECT || - sk->sk_state == BT_CONNECT2 || - sk->sk_state == BT_CONFIG) + sk->sk_state == BT_CONNECT2 || + sk->sk_state == BT_CONFIG) return mask; if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk)) @@ -508,7 +508,7 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; - err = put_user(amount, (int __user *) arg); + err = put_user(amount, (int __user *)arg); break; case TIOCINQ: @@ -519,7 +519,7 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) skb = skb_peek(&sk->sk_receive_queue); amount = skb ? skb->len : 0; release_sock(sk); - err = put_user(amount, (int __user *) arg); + err = put_user(amount, (int __user *)arg); break; default: @@ -637,7 +637,7 @@ static int bt_seq_show(struct seq_file *seq, void *v) struct bt_sock_list *l = PDE_DATA(file_inode(seq->file)); if (v == SEQ_START_TOKEN) { - seq_puts(seq ,"sk RefCnt Rmem Wmem User Inode Parent"); + seq_puts(seq, "sk RefCnt Rmem Wmem User Inode Parent"); if (l->custom_seq_show) { seq_putc(seq, ' '); @@ -657,7 +657,7 @@ static int bt_seq_show(struct seq_file *seq, void *v) sk_wmem_alloc_get(sk), from_kuid(seq_user_ns(seq), sock_i_uid(sk)), sock_i_ino(sk), - bt->parent? sock_i_ino(bt->parent): 0LU); + bt->parent ? sock_i_ino(bt->parent) : 0LU); if (l->custom_seq_show) { seq_putc(seq, ' '); @@ -678,7 +678,7 @@ static const struct seq_operations bt_seq_ops = { int bt_procfs_init(struct net *net, const char *name, struct bt_sock_list *sk_list, - int (* seq_show)(struct seq_file *, void *)) + int (*seq_show)(struct seq_file *, void *)) { sk_list->custom_seq_show = seq_show; @@ -694,7 +694,7 @@ void bt_procfs_cleanup(struct net *net, const char *name) #else int bt_procfs_init(struct net *net, const char *name, struct bt_sock_list *sk_list, - int (* seq_show)(struct seq_file *, void *)) + int (*seq_show)(struct seq_file *, void *)) { return 0; } diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c index 9c711f0dfae3..be2d469d6369 100644 --- a/net/bluetooth/amp.c +++ b/net/bluetooth/amp.c @@ -297,6 +297,9 @@ void amp_read_loc_assoc_final_data(struct hci_dev *hdev, struct hci_request req; int err; + if (!mgr) + return; + cp.phy_handle = hcon->handle; cp.len_so_far = cpu_to_le16(0); cp.max_len = cpu_to_le16(hdev->amp_assoc_size); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 4f1cd8063e72..6ffa89e3ba0a 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -203,6 +203,23 @@ static void hci_acl_create_connection(struct hci_conn *conn) BT_DBG("hcon %p", conn); + /* Many controllers disallow HCI Create Connection while it is doing + * HCI Inquiry. So we cancel the Inquiry first before issuing HCI Create + * Connection. This may cause the MGMT discovering state to become false + * without user space's request but it is okay since the MGMT Discovery + * APIs do not promise that discovery should be done forever. Instead, + * the user space monitors the status of MGMT discovering and it may + * request for discovery again when this flag becomes false. + */ + if (test_bit(HCI_INQUIRY, &hdev->flags)) { + /* Put this connection to "pending" state so that it will be + * executed after the inquiry cancel command complete event. + */ + conn->state = BT_CONNECT2; + hci_send_cmd(hdev, HCI_OP_INQUIRY_CANCEL, 0, NULL); + return; + } + conn->state = BT_CONNECT; conn->out = true; conn->role = HCI_ROLE_MASTER; @@ -276,6 +293,20 @@ static void hci_add_sco(struct hci_conn *conn, __u16 handle) hci_send_cmd(hdev, HCI_OP_ADD_SCO, sizeof(cp), &cp); } +static bool find_next_esco_param(struct hci_conn *conn, + const struct sco_param *esco_param, int size) +{ + for (; conn->attempt <= size; conn->attempt++) { + if (lmp_esco_2m_capable(conn->link) || + (esco_param[conn->attempt - 1].pkt_type & ESCO_2EV3)) + break; + BT_DBG("hcon %p skipped attempt %d, eSCO 2M not supported", + conn, conn->attempt); + } + + return conn->attempt <= size; +} + bool hci_setup_sync(struct hci_conn *conn, __u16 handle) { struct hci_dev *hdev = conn->hdev; @@ -297,13 +328,15 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle) switch (conn->setting & SCO_AIRMODE_MASK) { case SCO_AIRMODE_TRANSP: - if (conn->attempt > ARRAY_SIZE(esco_param_msbc)) + if (!find_next_esco_param(conn, esco_param_msbc, + ARRAY_SIZE(esco_param_msbc))) return false; param = &esco_param_msbc[conn->attempt - 1]; break; case SCO_AIRMODE_CVSD: if (lmp_esco_capable(conn->link)) { - if (conn->attempt > ARRAY_SIZE(esco_param_cvsd)) + if (!find_next_esco_param(conn, esco_param_cvsd, + ARRAY_SIZE(esco_param_cvsd))) return false; param = &esco_param_cvsd[conn->attempt - 1]; } else { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9d2c9a1c552f..b0d9c36acc03 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1362,8 +1362,10 @@ int hci_inquiry(void __user *arg) * cleared). If it is interrupted by a signal, return -EINTR. */ if (wait_on_bit(&hdev->flags, HCI_INQUIRY, - TASK_INTERRUPTIBLE)) - return -EINTR; + TASK_INTERRUPTIBLE)) { + err = -EINTR; + goto done; + } } /* for unlimited number of responses we will use buffer with @@ -3051,12 +3053,15 @@ void hci_adv_monitors_clear(struct hci_dev *hdev) int handle; idr_for_each_entry(&hdev->adv_monitors_idr, monitor, handle) - hci_free_adv_monitor(monitor); + hci_free_adv_monitor(hdev, monitor); idr_destroy(&hdev->adv_monitors_idr); } -void hci_free_adv_monitor(struct adv_monitor *monitor) +/* Frees the monitor structure and do some bookkeepings. + * This function requires the caller holds hdev->lock. + */ +void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor) { struct adv_pattern *pattern; struct adv_pattern *tmp; @@ -3064,68 +3069,167 @@ void hci_free_adv_monitor(struct adv_monitor *monitor) if (!monitor) return; - list_for_each_entry_safe(pattern, tmp, &monitor->patterns, list) + list_for_each_entry_safe(pattern, tmp, &monitor->patterns, list) { + list_del(&pattern->list); kfree(pattern); + } + + if (monitor->handle) + idr_remove(&hdev->adv_monitors_idr, monitor->handle); + + if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) { + hdev->adv_monitors_cnt--; + mgmt_adv_monitor_removed(hdev, monitor->handle); + } kfree(monitor); } -/* This function requires the caller holds hdev->lock */ -int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor) +int hci_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status) +{ + return mgmt_add_adv_patterns_monitor_complete(hdev, status); +} + +int hci_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status) +{ + return mgmt_remove_adv_monitor_complete(hdev, status); +} + +/* Assigns handle to a monitor, and if offloading is supported and power is on, + * also attempts to forward the request to the controller. + * Returns true if request is forwarded (result is pending), false otherwise. + * This function requires the caller holds hdev->lock. + */ +bool hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, + int *err) { int min, max, handle; - if (!monitor) - return -EINVAL; + *err = 0; + + if (!monitor) { + *err = -EINVAL; + return false; + } min = HCI_MIN_ADV_MONITOR_HANDLE; max = HCI_MIN_ADV_MONITOR_HANDLE + HCI_MAX_ADV_MONITOR_NUM_HANDLES; handle = idr_alloc(&hdev->adv_monitors_idr, monitor, min, max, GFP_KERNEL); - if (handle < 0) - return handle; + if (handle < 0) { + *err = handle; + return false; + } - hdev->adv_monitors_cnt++; monitor->handle = handle; - hci_update_background_scan(hdev); + if (!hdev_is_powered(hdev)) + return false; - return 0; + switch (hci_get_adv_monitor_offload_ext(hdev)) { + case HCI_ADV_MONITOR_EXT_NONE: + hci_update_background_scan(hdev); + bt_dev_dbg(hdev, "%s add monitor status %d", hdev->name, *err); + /* Message was not forwarded to controller - not an error */ + return false; + case HCI_ADV_MONITOR_EXT_MSFT: + *err = msft_add_monitor_pattern(hdev, monitor); + bt_dev_dbg(hdev, "%s add monitor msft status %d", hdev->name, + *err); + break; + } + + return (*err == 0); } -static int free_adv_monitor(int id, void *ptr, void *data) +/* Attempts to tell the controller and free the monitor. If somehow the + * controller doesn't have a corresponding handle, remove anyway. + * Returns true if request is forwarded (result is pending), false otherwise. + * This function requires the caller holds hdev->lock. + */ +static bool hci_remove_adv_monitor(struct hci_dev *hdev, + struct adv_monitor *monitor, + u16 handle, int *err) { - struct hci_dev *hdev = data; - struct adv_monitor *monitor = ptr; + *err = 0; - idr_remove(&hdev->adv_monitors_idr, monitor->handle); - hci_free_adv_monitor(monitor); - hdev->adv_monitors_cnt--; + switch (hci_get_adv_monitor_offload_ext(hdev)) { + case HCI_ADV_MONITOR_EXT_NONE: /* also goes here when powered off */ + goto free_monitor; + case HCI_ADV_MONITOR_EXT_MSFT: + *err = msft_remove_monitor(hdev, monitor, handle); + break; + } - return 0; + /* In case no matching handle registered, just free the monitor */ + if (*err == -ENOENT) + goto free_monitor; + + return (*err == 0); + +free_monitor: + if (*err == -ENOENT) + bt_dev_warn(hdev, "Removing monitor with no matching handle %d", + monitor->handle); + hci_free_adv_monitor(hdev, monitor); + + *err = 0; + return false; } -/* This function requires the caller holds hdev->lock */ -int hci_remove_adv_monitor(struct hci_dev *hdev, u16 handle) +/* Returns true if request is forwarded (result is pending), false otherwise. + * This function requires the caller holds hdev->lock. + */ +bool hci_remove_single_adv_monitor(struct hci_dev *hdev, u16 handle, int *err) +{ + struct adv_monitor *monitor = idr_find(&hdev->adv_monitors_idr, handle); + bool pending; + + if (!monitor) { + *err = -EINVAL; + return false; + } + + pending = hci_remove_adv_monitor(hdev, monitor, handle, err); + if (!*err && !pending) + hci_update_background_scan(hdev); + + bt_dev_dbg(hdev, "%s remove monitor handle %d, status %d, %spending", + hdev->name, handle, *err, pending ? "" : "not "); + + return pending; +} + +/* Returns true if request is forwarded (result is pending), false otherwise. + * This function requires the caller holds hdev->lock. + */ +bool hci_remove_all_adv_monitor(struct hci_dev *hdev, int *err) { struct adv_monitor *monitor; + int idr_next_id = 0; + bool pending = false; + bool update = false; - if (handle) { - monitor = idr_find(&hdev->adv_monitors_idr, handle); + *err = 0; + + while (!*err && !pending) { + monitor = idr_get_next(&hdev->adv_monitors_idr, &idr_next_id); if (!monitor) - return -ENOENT; + break; - idr_remove(&hdev->adv_monitors_idr, monitor->handle); - hci_free_adv_monitor(monitor); - hdev->adv_monitors_cnt--; - } else { - /* Remove all monitors if handle is 0. */ - idr_for_each(&hdev->adv_monitors_idr, &free_adv_monitor, hdev); + pending = hci_remove_adv_monitor(hdev, monitor, 0, err); + + if (!*err && !pending) + update = true; } - hci_update_background_scan(hdev); + if (update) + hci_update_background_scan(hdev); - return 0; + bt_dev_dbg(hdev, "%s remove all monitors status %d, %spending", + hdev->name, *err, pending ? "" : "not "); + + return pending; } /* This function requires the caller holds hdev->lock */ @@ -3134,6 +3238,14 @@ bool hci_is_adv_monitoring(struct hci_dev *hdev) return !idr_is_empty(&hdev->adv_monitors_idr); } +int hci_get_adv_monitor_offload_ext(struct hci_dev *hdev) +{ + if (msft_monitor_supported(hdev)) + return HCI_ADV_MONITOR_EXT_MSFT; + + return HCI_ADV_MONITOR_EXT_NONE; +} + struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list, bdaddr_t *bdaddr, u8 type) { @@ -3566,7 +3678,8 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action, } /* Suspend notifier should only act on events when powered. */ - if (!hdev_is_powered(hdev)) + if (!hdev_is_powered(hdev) || + hci_dev_test_flag(hdev, HCI_UNREGISTER)) goto done; if (action == PM_SUSPEND_PREPARE) { @@ -3827,10 +3940,12 @@ int hci_register_dev(struct hci_dev *hdev) hci_sock_dev_event(hdev, HCI_DEV_REG); hci_dev_hold(hdev); - hdev->suspend_notifier.notifier_call = hci_suspend_notifier; - error = register_pm_notifier(&hdev->suspend_notifier); - if (error) - goto err_wqueue; + if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { + hdev->suspend_notifier.notifier_call = hci_suspend_notifier; + error = register_pm_notifier(&hdev->suspend_notifier); + if (error) + goto err_wqueue; + } queue_work(hdev->req_workqueue, &hdev->power_on); @@ -3865,9 +3980,11 @@ void hci_unregister_dev(struct hci_dev *hdev) cancel_work_sync(&hdev->power_on); - hci_suspend_clear_tasks(hdev); - unregister_pm_notifier(&hdev->suspend_notifier); - cancel_work_sync(&hdev->suspend_prepare); + if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { + hci_suspend_clear_tasks(hdev); + unregister_pm_notifier(&hdev->suspend_notifier); + cancel_work_sync(&hdev->suspend_prepare); + } hci_dev_do_close(hdev); diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index 4626e0289a97..1a0ab58bfad0 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -237,8 +237,8 @@ static int conn_info_min_age_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get, - conn_info_min_age_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get, + conn_info_min_age_set, "%llu\n"); static int conn_info_max_age_set(void *data, u64 val) { @@ -265,8 +265,8 @@ static int conn_info_max_age_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get, - conn_info_max_age_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get, + conn_info_max_age_set, "%llu\n"); static ssize_t use_debug_keys_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) @@ -419,8 +419,8 @@ static int voice_setting_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(voice_setting_fops, voice_setting_get, - NULL, "0x%4.4llx\n"); +DEFINE_DEBUGFS_ATTRIBUTE(voice_setting_fops, voice_setting_get, + NULL, "0x%4.4llx\n"); static ssize_t ssp_debug_mode_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) @@ -476,9 +476,9 @@ static int min_encrypt_key_size_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(min_encrypt_key_size_fops, - min_encrypt_key_size_get, - min_encrypt_key_size_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(min_encrypt_key_size_fops, + min_encrypt_key_size_get, + min_encrypt_key_size_set, "%llu\n"); static int auto_accept_delay_get(void *data, u64 *val) { @@ -491,8 +491,8 @@ static int auto_accept_delay_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get, - auto_accept_delay_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(auto_accept_delay_fops, auto_accept_delay_get, + auto_accept_delay_set, "%llu\n"); static ssize_t force_bredr_smp_read(struct file *file, char __user *user_buf, @@ -558,8 +558,8 @@ static int idle_timeout_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(idle_timeout_fops, idle_timeout_get, - idle_timeout_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(idle_timeout_fops, idle_timeout_get, + idle_timeout_set, "%llu\n"); static int sniff_min_interval_set(void *data, u64 val) { @@ -586,8 +586,8 @@ static int sniff_min_interval_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(sniff_min_interval_fops, sniff_min_interval_get, - sniff_min_interval_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(sniff_min_interval_fops, sniff_min_interval_get, + sniff_min_interval_set, "%llu\n"); static int sniff_max_interval_set(void *data, u64 val) { @@ -614,8 +614,8 @@ static int sniff_max_interval_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get, - sniff_max_interval_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get, + sniff_max_interval_set, "%llu\n"); void hci_debugfs_create_bredr(struct hci_dev *hdev) { @@ -706,8 +706,8 @@ static int rpa_timeout_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(rpa_timeout_fops, rpa_timeout_get, - rpa_timeout_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(rpa_timeout_fops, rpa_timeout_get, + rpa_timeout_set, "%llu\n"); static int random_address_show(struct seq_file *f, void *p) { @@ -869,8 +869,8 @@ static int conn_min_interval_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(conn_min_interval_fops, conn_min_interval_get, - conn_min_interval_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(conn_min_interval_fops, conn_min_interval_get, + conn_min_interval_set, "%llu\n"); static int conn_max_interval_set(void *data, u64 val) { @@ -897,8 +897,8 @@ static int conn_max_interval_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(conn_max_interval_fops, conn_max_interval_get, - conn_max_interval_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(conn_max_interval_fops, conn_max_interval_get, + conn_max_interval_set, "%llu\n"); static int conn_latency_set(void *data, u64 val) { @@ -925,8 +925,8 @@ static int conn_latency_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(conn_latency_fops, conn_latency_get, - conn_latency_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(conn_latency_fops, conn_latency_get, + conn_latency_set, "%llu\n"); static int supervision_timeout_set(void *data, u64 val) { @@ -953,8 +953,8 @@ static int supervision_timeout_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(supervision_timeout_fops, supervision_timeout_get, - supervision_timeout_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(supervision_timeout_fops, supervision_timeout_get, + supervision_timeout_set, "%llu\n"); static int adv_channel_map_set(void *data, u64 val) { @@ -981,8 +981,8 @@ static int adv_channel_map_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(adv_channel_map_fops, adv_channel_map_get, - adv_channel_map_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(adv_channel_map_fops, adv_channel_map_get, + adv_channel_map_set, "%llu\n"); static int adv_min_interval_set(void *data, u64 val) { @@ -1009,8 +1009,8 @@ static int adv_min_interval_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(adv_min_interval_fops, adv_min_interval_get, - adv_min_interval_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(adv_min_interval_fops, adv_min_interval_get, + adv_min_interval_set, "%llu\n"); static int adv_max_interval_set(void *data, u64 val) { @@ -1037,8 +1037,8 @@ static int adv_max_interval_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get, - adv_max_interval_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(adv_max_interval_fops, adv_max_interval_get, + adv_max_interval_set, "%llu\n"); static int min_key_size_set(void *data, u64 val) { @@ -1065,8 +1065,8 @@ static int min_key_size_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(min_key_size_fops, min_key_size_get, - min_key_size_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(min_key_size_fops, min_key_size_get, + min_key_size_set, "%llu\n"); static int max_key_size_set(void *data, u64 val) { @@ -1093,8 +1093,8 @@ static int max_key_size_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(max_key_size_fops, max_key_size_get, - max_key_size_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(max_key_size_fops, max_key_size_get, + max_key_size_set, "%llu\n"); static int auth_payload_timeout_set(void *data, u64 val) { @@ -1121,9 +1121,9 @@ static int auth_payload_timeout_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(auth_payload_timeout_fops, - auth_payload_timeout_get, - auth_payload_timeout_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(auth_payload_timeout_fops, + auth_payload_timeout_get, + auth_payload_timeout_set, "%llu\n"); static ssize_t force_no_mitm_read(struct file *file, char __user *user_buf, diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 71bffd745472..e55976db4403 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -29,6 +29,7 @@ #include "smp.h" #include "hci_request.h" +#include "msft.h" #define HCI_REQ_DONE 0 #define HCI_REQ_PEND 1 @@ -404,13 +405,18 @@ static void cancel_interleave_scan(struct hci_dev *hdev) */ static bool __hci_update_interleaved_scan(struct hci_dev *hdev) { - /* If there is at least one ADV monitors and one pending LE connection - * or one device to be scanned for, we should alternate between - * allowlist scan and one without any filters to save power. + /* Do interleaved scan only if all of the following are true: + * - There is at least one ADV monitor + * - At least one pending LE connection or one device to be scanned for + * - Monitor offloading is not supported + * If so, we should alternate between allowlist scan and one without + * any filters to save power. */ bool use_interleaving = hci_is_adv_monitoring(hdev) && !(list_empty(&hdev->pend_le_conns) && - list_empty(&hdev->pend_le_reports)); + list_empty(&hdev->pend_le_reports)) && + hci_get_adv_monitor_offload_ext(hdev) == + HCI_ADV_MONITOR_EXT_NONE; bool is_interleaving = is_interleave_scanning(hdev); if (use_interleaving && !is_interleaving) { @@ -899,14 +905,11 @@ static u8 update_white_list(struct hci_request *req) /* Use the allowlist unless the following conditions are all true: * - We are not currently suspending - * - There are 1 or more ADV monitors registered + * - There are 1 or more ADV monitors registered and it's not offloaded * - Interleaved scanning is not currently using the allowlist - * - * Once the controller offloading of advertisement monitor is in place, - * the above condition should include the support of MSFT extension - * support. */ if (!idr_is_empty(&hdev->adv_monitors_idr) && !hdev->suspended && + hci_get_adv_monitor_offload_ext(hdev) == HCI_ADV_MONITOR_EXT_NONE && hdev->interleave_scan_state != INTERLEAVE_SCAN_ALLOWLIST) return 0x00; @@ -1087,6 +1090,8 @@ void hci_req_add_le_passive_scan(struct hci_request *req) if (hdev->suspended) { window = hdev->le_scan_window_suspend; interval = hdev->le_scan_int_suspend; + + set_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks); } else if (hci_is_le_conn_scanning(hdev)) { window = hdev->le_scan_window_connect; interval = hdev->le_scan_int_connect; @@ -1170,19 +1175,6 @@ static void hci_req_set_event_filter(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } -static void hci_req_config_le_suspend_scan(struct hci_request *req) -{ - /* Before changing params disable scan if enabled */ - if (hci_dev_test_flag(req->hdev, HCI_LE_SCAN)) - hci_req_add_le_scan_disable(req, false); - - /* Configure params and enable scanning */ - hci_req_add_le_passive_scan(req); - - /* Block suspend notifier on response */ - set_bit(SUSPEND_SCAN_ENABLE, req->hdev->suspend_tasks); -} - static void cancel_adv_timeout(struct hci_dev *hdev) { if (hdev->adv_instance_timeout) { @@ -1245,12 +1237,37 @@ static void suspend_req_complete(struct hci_dev *hdev, u8 status, u16 opcode) { bt_dev_dbg(hdev, "Request complete opcode=0x%x, status=0x%x", opcode, status); - if (test_and_clear_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks) || - test_and_clear_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks)) { + if (test_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks) || + test_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks)) { + clear_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks); + clear_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); + wake_up(&hdev->suspend_wait_q); + } + + if (test_bit(SUSPEND_SET_ADV_FILTER, hdev->suspend_tasks)) { + clear_bit(SUSPEND_SET_ADV_FILTER, hdev->suspend_tasks); wake_up(&hdev->suspend_wait_q); } } +static void hci_req_add_set_adv_filter_enable(struct hci_request *req, + bool enable) +{ + struct hci_dev *hdev = req->hdev; + + switch (hci_get_adv_monitor_offload_ext(hdev)) { + case HCI_ADV_MONITOR_EXT_MSFT: + msft_req_add_set_filter_enable(req, enable); + break; + default: + return; + } + + /* No need to block when enabling since it's on resume path */ + if (hdev->suspended && !enable) + set_bit(SUSPEND_SET_ADV_FILTER, hdev->suspend_tasks); +} + /* Call with hci_dev_lock */ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) { @@ -1308,6 +1325,9 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) hci_req_add_le_scan_disable(&req, false); } + /* Disable advertisement filters */ + hci_req_add_set_adv_filter_enable(&req, false); + /* Mark task needing completion */ set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); @@ -1336,7 +1356,7 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) /* Enable event filter for paired devices */ hci_req_set_event_filter(&req); /* Enable passive scan at lower duty cycle */ - hci_req_config_le_suspend_scan(&req); + __hci_update_background_scan(&req); /* Pause scan changes again. */ hdev->scanning_paused = true; hci_req_run(&req, suspend_req_complete); @@ -1346,7 +1366,9 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) hci_req_clear_event_filter(&req); /* Reset passive/background scanning to normal */ - hci_req_config_le_suspend_scan(&req); + __hci_update_background_scan(&req); + /* Enable all of the advertisement filters */ + hci_req_add_set_adv_filter_enable(&req, true); /* Unpause directed advertising */ hdev->advertising_paused = false; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 17b87b57a175..72c2f5226d67 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4519,6 +4519,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, } goto done; + case L2CAP_CONF_UNKNOWN: case L2CAP_CONF_UNACCEPT: if (chan->num_conf_rsp <= L2CAP_CONF_MAX_CONF_RSP) { char req[64]; @@ -8276,10 +8277,73 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) mutex_unlock(&conn->chan_lock); } +/* Append fragment into frame respecting the maximum len of rx_skb */ +static int l2cap_recv_frag(struct l2cap_conn *conn, struct sk_buff *skb, + u16 len) +{ + if (!conn->rx_skb) { + /* Allocate skb for the complete frame (with header) */ + conn->rx_skb = bt_skb_alloc(len, GFP_KERNEL); + if (!conn->rx_skb) + return -ENOMEM; + /* Init rx_len */ + conn->rx_len = len; + } + + /* Copy as much as the rx_skb can hold */ + len = min_t(u16, len, skb->len); + skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, len), len); + skb_pull(skb, len); + conn->rx_len -= len; + + return len; +} + +static int l2cap_recv_len(struct l2cap_conn *conn, struct sk_buff *skb) +{ + struct sk_buff *rx_skb; + int len; + + /* Append just enough to complete the header */ + len = l2cap_recv_frag(conn, skb, L2CAP_LEN_SIZE - conn->rx_skb->len); + + /* If header could not be read just continue */ + if (len < 0 || conn->rx_skb->len < L2CAP_LEN_SIZE) + return len; + + rx_skb = conn->rx_skb; + len = get_unaligned_le16(rx_skb->data); + + /* Check if rx_skb has enough space to received all fragments */ + if (len + (L2CAP_HDR_SIZE - L2CAP_LEN_SIZE) <= skb_tailroom(rx_skb)) { + /* Update expected len */ + conn->rx_len = len + (L2CAP_HDR_SIZE - L2CAP_LEN_SIZE); + return L2CAP_LEN_SIZE; + } + + /* Reset conn->rx_skb since it will need to be reallocated in order to + * fit all fragments. + */ + conn->rx_skb = NULL; + + /* Reallocates rx_skb using the exact expected length */ + len = l2cap_recv_frag(conn, rx_skb, + len + (L2CAP_HDR_SIZE - L2CAP_LEN_SIZE)); + kfree_skb(rx_skb); + + return len; +} + +static void l2cap_recv_reset(struct l2cap_conn *conn) +{ + kfree_skb(conn->rx_skb); + conn->rx_skb = NULL; + conn->rx_len = 0; +} + void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) { struct l2cap_conn *conn = hcon->l2cap_data; - struct l2cap_hdr *hdr; int len; /* For AMP controller do not create l2cap conn */ @@ -8298,23 +8362,23 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) case ACL_START: case ACL_START_NO_FLUSH: case ACL_COMPLETE: - if (conn->rx_len) { + if (conn->rx_skb) { BT_ERR("Unexpected start frame (len %d)", skb->len); - kfree_skb(conn->rx_skb); - conn->rx_skb = NULL; - conn->rx_len = 0; + l2cap_recv_reset(conn); l2cap_conn_unreliable(conn, ECOMM); } - /* Start fragment always begin with Basic L2CAP header */ - if (skb->len < L2CAP_HDR_SIZE) { - BT_ERR("Frame is too short (len %d)", skb->len); - l2cap_conn_unreliable(conn, ECOMM); - goto drop; + /* Start fragment may not contain the L2CAP length so just + * copy the initial byte when that happens and use conn->mtu as + * expected length. + */ + if (skb->len < L2CAP_LEN_SIZE) { + if (l2cap_recv_frag(conn, skb, conn->mtu) < 0) + goto drop; + return; } - hdr = (struct l2cap_hdr *) skb->data; - len = __le16_to_cpu(hdr->len) + L2CAP_HDR_SIZE; + len = get_unaligned_le16(skb->data) + L2CAP_HDR_SIZE; if (len == skb->len) { /* Complete frame received */ @@ -8331,38 +8395,43 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) goto drop; } - /* Allocate skb for the complete frame (with header) */ - conn->rx_skb = bt_skb_alloc(len, GFP_KERNEL); - if (!conn->rx_skb) + /* Append fragment into frame (with header) */ + if (l2cap_recv_frag(conn, skb, len) < 0) goto drop; - skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), - skb->len); - conn->rx_len = len - skb->len; break; case ACL_CONT: BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len); - if (!conn->rx_len) { + if (!conn->rx_skb) { BT_ERR("Unexpected continuation frame (len %d)", skb->len); l2cap_conn_unreliable(conn, ECOMM); goto drop; } + /* Complete the L2CAP length if it has not been read */ + if (conn->rx_skb->len < L2CAP_LEN_SIZE) { + if (l2cap_recv_len(conn, skb) < 0) { + l2cap_conn_unreliable(conn, ECOMM); + goto drop; + } + + /* Header still could not be read just continue */ + if (conn->rx_skb->len < L2CAP_LEN_SIZE) + return; + } + if (skb->len > conn->rx_len) { BT_ERR("Fragment is too long (len %d, expected %d)", skb->len, conn->rx_len); - kfree_skb(conn->rx_skb); - conn->rx_skb = NULL; - conn->rx_len = 0; + l2cap_recv_reset(conn); l2cap_conn_unreliable(conn, ECOMM); goto drop; } - skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), - skb->len); - conn->rx_len -= skb->len; + /* Append fragment into frame (with header) */ + l2cap_recv_frag(conn, skb, skb->len); if (!conn->rx_len) { /* Complete frame received. l2cap_recv_frame diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index fa0f7a4a1d2f..74971b4bd457 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -124,6 +124,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_REMOVE_ADV_MONITOR, MGMT_OP_ADD_EXT_ADV_PARAMS, MGMT_OP_ADD_EXT_ADV_DATA, + MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, }; static const u16 mgmt_events[] = { @@ -4166,14 +4167,24 @@ static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_ADV_MONITOR_ADDED, hdev, &ev, sizeof(ev), sk); } -static void mgmt_adv_monitor_removed(struct sock *sk, struct hci_dev *hdev, - u16 handle) +void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle) { - struct mgmt_ev_adv_monitor_added ev; + struct mgmt_ev_adv_monitor_removed ev; + struct mgmt_pending_cmd *cmd; + struct sock *sk_skip = NULL; + struct mgmt_cp_remove_adv_monitor *cp; + + cmd = pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev); + if (cmd) { + cp = cmd->param; + + if (cp->monitor_handle) + sk_skip = cmd->sk; + } ev.monitor_handle = cpu_to_le16(handle); - mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk); + mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk_skip); } static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev, @@ -4184,6 +4195,7 @@ static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev, int handle, err; size_t rp_size = 0; __u32 supported = 0; + __u32 enabled = 0; __u16 num_handles = 0; __u16 handles[HCI_MAX_ADV_MONITOR_NUM_HANDLES]; @@ -4191,12 +4203,11 @@ static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); - if (msft_get_features(hdev) & MSFT_FEATURE_MASK_LE_ADV_MONITOR) + if (msft_monitor_supported(hdev)) supported |= MGMT_ADV_MONITOR_FEATURE_MASK_OR_PATTERNS; - idr_for_each_entry(&hdev->adv_monitors_idr, monitor, handle) { + idr_for_each_entry(&hdev->adv_monitors_idr, monitor, handle) handles[num_handles++] = monitor->handle; - } hci_dev_unlock(hdev); @@ -4205,11 +4216,11 @@ static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev, if (!rp) return -ENOMEM; - /* Once controller-based monitoring is in place, the enabled_features - * should reflect the use. - */ + /* All supported features are currently enabled */ + enabled = supported; + rp->supported_features = cpu_to_le32(supported); - rp->enabled_features = 0; + rp->enabled_features = cpu_to_le32(enabled); rp->max_num_handles = cpu_to_le16(HCI_MAX_ADV_MONITOR_NUM_HANDLES); rp->max_num_patterns = HCI_MAX_ADV_MONITOR_NUM_PATTERNS; rp->num_handles = cpu_to_le16(num_handles); @@ -4225,105 +4236,267 @@ static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev, return err; } +int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status) +{ + struct mgmt_rp_add_adv_patterns_monitor rp; + struct mgmt_pending_cmd *cmd; + struct adv_monitor *monitor; + int err = 0; + + hci_dev_lock(hdev); + + cmd = pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev); + if (!cmd) { + cmd = pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev); + if (!cmd) + goto done; + } + + monitor = cmd->user_data; + rp.monitor_handle = cpu_to_le16(monitor->handle); + + if (!status) { + mgmt_adv_monitor_added(cmd->sk, hdev, monitor->handle); + hdev->adv_monitors_cnt++; + if (monitor->state == ADV_MONITOR_STATE_NOT_REGISTERED) + monitor->state = ADV_MONITOR_STATE_REGISTERED; + hci_update_background_scan(hdev); + } + + err = mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status), &rp, sizeof(rp)); + mgmt_pending_remove(cmd); + +done: + hci_dev_unlock(hdev); + bt_dev_dbg(hdev, "add monitor %d complete, status %d", + rp.monitor_handle, status); + + return err; +} + +static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev, + struct adv_monitor *m, u8 status, + void *data, u16 len, u16 op) +{ + struct mgmt_rp_add_adv_patterns_monitor rp; + struct mgmt_pending_cmd *cmd; + int err; + bool pending; + + hci_dev_lock(hdev); + + if (status) + goto unlock; + + if (pending_find(MGMT_OP_SET_LE, hdev) || + pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) || + pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev) || + pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) { + status = MGMT_STATUS_BUSY; + goto unlock; + } + + cmd = mgmt_pending_add(sk, op, hdev, data, len); + if (!cmd) { + status = MGMT_STATUS_NO_RESOURCES; + goto unlock; + } + + cmd->user_data = m; + pending = hci_add_adv_monitor(hdev, m, &err); + if (err) { + if (err == -ENOSPC || err == -ENOMEM) + status = MGMT_STATUS_NO_RESOURCES; + else if (err == -EINVAL) + status = MGMT_STATUS_INVALID_PARAMS; + else + status = MGMT_STATUS_FAILED; + + mgmt_pending_remove(cmd); + goto unlock; + } + + if (!pending) { + mgmt_pending_remove(cmd); + rp.monitor_handle = cpu_to_le16(m->handle); + mgmt_adv_monitor_added(sk, hdev, m->handle); + m->state = ADV_MONITOR_STATE_REGISTERED; + hdev->adv_monitors_cnt++; + + hci_dev_unlock(hdev); + return mgmt_cmd_complete(sk, hdev->id, op, MGMT_STATUS_SUCCESS, + &rp, sizeof(rp)); + } + + hci_dev_unlock(hdev); + + return 0; + +unlock: + hci_free_adv_monitor(hdev, m); + hci_dev_unlock(hdev); + return mgmt_cmd_status(sk, hdev->id, op, status); +} + +static void parse_adv_monitor_rssi(struct adv_monitor *m, + struct mgmt_adv_rssi_thresholds *rssi) +{ + if (rssi) { + m->rssi.low_threshold = rssi->low_threshold; + m->rssi.low_threshold_timeout = + __le16_to_cpu(rssi->low_threshold_timeout); + m->rssi.high_threshold = rssi->high_threshold; + m->rssi.high_threshold_timeout = + __le16_to_cpu(rssi->high_threshold_timeout); + m->rssi.sampling_period = rssi->sampling_period; + } else { + /* Default values. These numbers are the least constricting + * parameters for MSFT API to work, so it behaves as if there + * are no rssi parameter to consider. May need to be changed + * if other API are to be supported. + */ + m->rssi.low_threshold = -127; + m->rssi.low_threshold_timeout = 60; + m->rssi.high_threshold = -127; + m->rssi.high_threshold_timeout = 0; + m->rssi.sampling_period = 0; + } +} + +static u8 parse_adv_monitor_pattern(struct adv_monitor *m, u8 pattern_count, + struct mgmt_adv_pattern *patterns) +{ + u8 offset = 0, length = 0; + struct adv_pattern *p = NULL; + int i; + + for (i = 0; i < pattern_count; i++) { + offset = patterns[i].offset; + length = patterns[i].length; + if (offset >= HCI_MAX_AD_LENGTH || + length > HCI_MAX_AD_LENGTH || + (offset + length) > HCI_MAX_AD_LENGTH) + return MGMT_STATUS_INVALID_PARAMS; + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return MGMT_STATUS_NO_RESOURCES; + + p->ad_type = patterns[i].ad_type; + p->offset = patterns[i].offset; + p->length = patterns[i].length; + memcpy(p->value, patterns[i].value, p->length); + + INIT_LIST_HEAD(&p->list); + list_add(&p->list, &m->patterns); + } + + return MGMT_STATUS_SUCCESS; +} + static int add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_add_adv_patterns_monitor *cp = data; - struct mgmt_rp_add_adv_patterns_monitor rp; struct adv_monitor *m = NULL; - struct adv_pattern *p = NULL; - unsigned int mp_cnt = 0, prev_adv_monitors_cnt; - __u8 cp_ofst = 0, cp_len = 0; - int err, i; + u8 status = MGMT_STATUS_SUCCESS; + size_t expected_size = sizeof(*cp); BT_DBG("request for %s", hdev->name); - if (len <= sizeof(*cp) || cp->pattern_count == 0) { - err = mgmt_cmd_status(sk, hdev->id, - MGMT_OP_ADD_ADV_PATTERNS_MONITOR, - MGMT_STATUS_INVALID_PARAMS); - goto failed; + if (len <= sizeof(*cp)) { + status = MGMT_STATUS_INVALID_PARAMS; + goto done; } - m = kmalloc(sizeof(*m), GFP_KERNEL); + expected_size += cp->pattern_count * sizeof(struct mgmt_adv_pattern); + if (len != expected_size) { + status = MGMT_STATUS_INVALID_PARAMS; + goto done; + } + + m = kzalloc(sizeof(*m), GFP_KERNEL); if (!m) { - err = -ENOMEM; - goto failed; + status = MGMT_STATUS_NO_RESOURCES; + goto done; } INIT_LIST_HEAD(&m->patterns); - m->active = false; - for (i = 0; i < cp->pattern_count; i++) { - if (++mp_cnt > HCI_MAX_ADV_MONITOR_NUM_PATTERNS) { - err = mgmt_cmd_status(sk, hdev->id, - MGMT_OP_ADD_ADV_PATTERNS_MONITOR, - MGMT_STATUS_INVALID_PARAMS); - goto failed; - } + parse_adv_monitor_rssi(m, NULL); + status = parse_adv_monitor_pattern(m, cp->pattern_count, cp->patterns); - cp_ofst = cp->patterns[i].offset; - cp_len = cp->patterns[i].length; - if (cp_ofst >= HCI_MAX_AD_LENGTH || - cp_len > HCI_MAX_AD_LENGTH || - (cp_ofst + cp_len) > HCI_MAX_AD_LENGTH) { - err = mgmt_cmd_status(sk, hdev->id, - MGMT_OP_ADD_ADV_PATTERNS_MONITOR, - MGMT_STATUS_INVALID_PARAMS); - goto failed; - } +done: + return __add_adv_patterns_monitor(sk, hdev, m, status, data, len, + MGMT_OP_ADD_ADV_PATTERNS_MONITOR); +} - p = kmalloc(sizeof(*p), GFP_KERNEL); - if (!p) { - err = -ENOMEM; - goto failed; - } +static int add_adv_patterns_monitor_rssi(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_cp_add_adv_patterns_monitor_rssi *cp = data; + struct adv_monitor *m = NULL; + u8 status = MGMT_STATUS_SUCCESS; + size_t expected_size = sizeof(*cp); - p->ad_type = cp->patterns[i].ad_type; - p->offset = cp->patterns[i].offset; - p->length = cp->patterns[i].length; - memcpy(p->value, cp->patterns[i].value, p->length); + BT_DBG("request for %s", hdev->name); - INIT_LIST_HEAD(&p->list); - list_add(&p->list, &m->patterns); + if (len <= sizeof(*cp)) { + status = MGMT_STATUS_INVALID_PARAMS; + goto done; } - if (mp_cnt != cp->pattern_count) { - err = mgmt_cmd_status(sk, hdev->id, - MGMT_OP_ADD_ADV_PATTERNS_MONITOR, - MGMT_STATUS_INVALID_PARAMS); - goto failed; + expected_size += cp->pattern_count * sizeof(struct mgmt_adv_pattern); + if (len != expected_size) { + status = MGMT_STATUS_INVALID_PARAMS; + goto done; } - hci_dev_lock(hdev); + m = kzalloc(sizeof(*m), GFP_KERNEL); + if (!m) { + status = MGMT_STATUS_NO_RESOURCES; + goto done; + } - prev_adv_monitors_cnt = hdev->adv_monitors_cnt; + INIT_LIST_HEAD(&m->patterns); - err = hci_add_adv_monitor(hdev, m); - if (err) { - if (err == -ENOSPC) { - mgmt_cmd_status(sk, hdev->id, - MGMT_OP_ADD_ADV_PATTERNS_MONITOR, - MGMT_STATUS_NO_RESOURCES); - } - goto unlock; - } + parse_adv_monitor_rssi(m, &cp->rssi); + status = parse_adv_monitor_pattern(m, cp->pattern_count, cp->patterns); - if (hdev->adv_monitors_cnt > prev_adv_monitors_cnt) - mgmt_adv_monitor_added(sk, hdev, m->handle); +done: + return __add_adv_patterns_monitor(sk, hdev, m, status, data, len, + MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI); +} - hci_dev_unlock(hdev); +int mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status) +{ + struct mgmt_rp_remove_adv_monitor rp; + struct mgmt_cp_remove_adv_monitor *cp; + struct mgmt_pending_cmd *cmd; + int err = 0; - rp.monitor_handle = cpu_to_le16(m->handle); + hci_dev_lock(hdev); - return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_ADV_PATTERNS_MONITOR, - MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + cmd = pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev); + if (!cmd) + goto done; -unlock: + cp = cmd->param; + rp.monitor_handle = cp->monitor_handle; + + if (!status) + hci_update_background_scan(hdev); + + err = mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_status(status), &rp, sizeof(rp)); + mgmt_pending_remove(cmd); + +done: hci_dev_unlock(hdev); + bt_dev_dbg(hdev, "remove monitor %d complete, status %d", + rp.monitor_handle, status); -failed: - hci_free_adv_monitor(m); return err; } @@ -4332,37 +4505,64 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, { struct mgmt_cp_remove_adv_monitor *cp = data; struct mgmt_rp_remove_adv_monitor rp; - unsigned int prev_adv_monitors_cnt; - u16 handle; - int err; + struct mgmt_pending_cmd *cmd; + u16 handle = __le16_to_cpu(cp->monitor_handle); + int err, status; + bool pending; BT_DBG("request for %s", hdev->name); + rp.monitor_handle = cp->monitor_handle; hci_dev_lock(hdev); - handle = __le16_to_cpu(cp->monitor_handle); - prev_adv_monitors_cnt = hdev->adv_monitors_cnt; + if (pending_find(MGMT_OP_SET_LE, hdev) || + pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev) || + pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) || + pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) { + status = MGMT_STATUS_BUSY; + goto unlock; + } - err = hci_remove_adv_monitor(hdev, handle); - if (err == -ENOENT) { - err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADV_MONITOR, - MGMT_STATUS_INVALID_INDEX); + cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len); + if (!cmd) { + status = MGMT_STATUS_NO_RESOURCES; goto unlock; } - if (hdev->adv_monitors_cnt < prev_adv_monitors_cnt) - mgmt_adv_monitor_removed(sk, hdev, handle); + if (handle) + pending = hci_remove_single_adv_monitor(hdev, handle, &err); + else + pending = hci_remove_all_adv_monitor(hdev, &err); - hci_dev_unlock(hdev); + if (err) { + mgmt_pending_remove(cmd); - rp.monitor_handle = cp->monitor_handle; + if (err == -ENOENT) + status = MGMT_STATUS_INVALID_INDEX; + else + status = MGMT_STATUS_FAILED; - return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_ADV_MONITOR, - MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + goto unlock; + } + + /* monitor can be removed without forwarding request to controller */ + if (!pending) { + mgmt_pending_remove(cmd); + hci_dev_unlock(hdev); + + return mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_REMOVE_ADV_MONITOR, + MGMT_STATUS_SUCCESS, + &rp, sizeof(rp)); + } + + hci_dev_unlock(hdev); + return 0; unlock: hci_dev_unlock(hdev); - return err; + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADV_MONITOR, + status); } static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status, @@ -4798,6 +4998,14 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } + if (hdev->discovery_paused) { + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_START_SERVICE_DISCOVERY, + MGMT_STATUS_BUSY, &cp->type, + sizeof(cp->type)); + goto failed; + } + uuid_count = __le16_to_cpu(cp->uuid_count); if (uuid_count > max_uuid_count) { bt_dev_err(hdev, "service_discovery: too big uuid_count value %u", @@ -8234,6 +8442,9 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { HCI_MGMT_VAR_LEN }, { add_ext_adv_data, MGMT_ADD_EXT_ADV_DATA_SIZE, HCI_MGMT_VAR_LEN }, + { add_adv_patterns_monitor_rssi, + MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE, + HCI_MGMT_VAR_LEN }, }; void mgmt_index_added(struct hci_dev *hdev) diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index 4b39534a14a1..47b104f318e9 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -5,9 +5,16 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> +#include <net/bluetooth/mgmt.h> +#include "hci_request.h" +#include "mgmt_util.h" #include "msft.h" +#define MSFT_RSSI_THRESHOLD_VALUE_MIN -127 +#define MSFT_RSSI_THRESHOLD_VALUE_MAX 20 +#define MSFT_RSSI_LOW_TIMEOUT_MAX 0x3C + #define MSFT_OP_READ_SUPPORTED_FEATURES 0x00 struct msft_cp_read_supported_features { __u8 sub_opcode; @@ -21,12 +28,83 @@ struct msft_rp_read_supported_features { __u8 evt_prefix[]; } __packed; +#define MSFT_OP_LE_MONITOR_ADVERTISEMENT 0x03 +#define MSFT_MONITOR_ADVERTISEMENT_TYPE_PATTERN 0x01 +struct msft_le_monitor_advertisement_pattern { + __u8 length; + __u8 data_type; + __u8 start_byte; + __u8 pattern[0]; +}; + +struct msft_le_monitor_advertisement_pattern_data { + __u8 count; + __u8 data[0]; +}; + +struct msft_cp_le_monitor_advertisement { + __u8 sub_opcode; + __s8 rssi_high; + __s8 rssi_low; + __u8 rssi_low_interval; + __u8 rssi_sampling_period; + __u8 cond_type; + __u8 data[0]; +} __packed; + +struct msft_rp_le_monitor_advertisement { + __u8 status; + __u8 sub_opcode; + __u8 handle; +} __packed; + +#define MSFT_OP_LE_CANCEL_MONITOR_ADVERTISEMENT 0x04 +struct msft_cp_le_cancel_monitor_advertisement { + __u8 sub_opcode; + __u8 handle; +} __packed; + +struct msft_rp_le_cancel_monitor_advertisement { + __u8 status; + __u8 sub_opcode; +} __packed; + +#define MSFT_OP_LE_SET_ADVERTISEMENT_FILTER_ENABLE 0x05 +struct msft_cp_le_set_advertisement_filter_enable { + __u8 sub_opcode; + __u8 enable; +} __packed; + +struct msft_rp_le_set_advertisement_filter_enable { + __u8 status; + __u8 sub_opcode; +} __packed; + +struct msft_monitor_advertisement_handle_data { + __u8 msft_handle; + __u16 mgmt_handle; + struct list_head list; +}; + struct msft_data { __u64 features; __u8 evt_prefix_len; __u8 *evt_prefix; + struct list_head handle_map; + __u16 pending_add_handle; + __u16 pending_remove_handle; + __u8 reregistering; + __u8 filter_enabled; }; +static int __msft_add_monitor_pattern(struct hci_dev *hdev, + struct adv_monitor *monitor); + +bool msft_monitor_supported(struct hci_dev *hdev) +{ + return !!(msft_get_features(hdev) & MSFT_FEATURE_MASK_LE_ADV_MONITOR); +} + static bool read_supported_features(struct hci_dev *hdev, struct msft_data *msft) { @@ -72,6 +150,35 @@ failed: return false; } +/* This function requires the caller holds hdev->lock */ +static void reregister_monitor_on_restart(struct hci_dev *hdev, int handle) +{ + struct adv_monitor *monitor; + struct msft_data *msft = hdev->msft_data; + int err; + + while (1) { + monitor = idr_get_next(&hdev->adv_monitors_idr, &handle); + if (!monitor) { + /* All monitors have been reregistered */ + msft->reregistering = false; + hci_update_background_scan(hdev); + return; + } + + msft->pending_add_handle = (u16)handle; + err = __msft_add_monitor_pattern(hdev, monitor); + + /* If success, we return and wait for monitor added callback */ + if (!err) + return; + + /* Otherwise remove the monitor and keep registering */ + hci_free_adv_monitor(hdev, monitor); + handle++; + } +} + void msft_do_open(struct hci_dev *hdev) { struct msft_data *msft; @@ -90,12 +197,21 @@ void msft_do_open(struct hci_dev *hdev) return; } + INIT_LIST_HEAD(&msft->handle_map); hdev->msft_data = msft; + + if (msft_monitor_supported(hdev)) { + msft->reregistering = true; + msft_set_filter_enable(hdev, true); + reregister_monitor_on_restart(hdev, 0); + } } void msft_do_close(struct hci_dev *hdev) { struct msft_data *msft = hdev->msft_data; + struct msft_monitor_advertisement_handle_data *handle_data, *tmp; + struct adv_monitor *monitor; if (!msft) return; @@ -104,6 +220,17 @@ void msft_do_close(struct hci_dev *hdev) hdev->msft_data = NULL; + list_for_each_entry_safe(handle_data, tmp, &msft->handle_map, list) { + monitor = idr_find(&hdev->adv_monitors_idr, + handle_data->mgmt_handle); + + if (monitor && monitor->state == ADV_MONITOR_STATE_OFFLOADED) + monitor->state = ADV_MONITOR_STATE_REGISTERED; + + list_del(&handle_data->list); + kfree(handle_data); + } + kfree(msft->evt_prefix); kfree(msft); } @@ -145,5 +272,336 @@ __u64 msft_get_features(struct hci_dev *hdev) { struct msft_data *msft = hdev->msft_data; - return msft ? msft->features : 0; + return msft ? msft->features : 0; +} + +/* is_mgmt = true matches the handle exposed to userspace via mgmt. + * is_mgmt = false matches the handle used by the msft controller. + * This function requires the caller holds hdev->lock + */ +static struct msft_monitor_advertisement_handle_data *msft_find_handle_data + (struct hci_dev *hdev, u16 handle, bool is_mgmt) +{ + struct msft_monitor_advertisement_handle_data *entry; + struct msft_data *msft = hdev->msft_data; + + list_for_each_entry(entry, &msft->handle_map, list) { + if (is_mgmt && entry->mgmt_handle == handle) + return entry; + if (!is_mgmt && entry->msft_handle == handle) + return entry; + } + + return NULL; +} + +static void msft_le_monitor_advertisement_cb(struct hci_dev *hdev, + u8 status, u16 opcode, + struct sk_buff *skb) +{ + struct msft_rp_le_monitor_advertisement *rp; + struct adv_monitor *monitor; + struct msft_monitor_advertisement_handle_data *handle_data; + struct msft_data *msft = hdev->msft_data; + + hci_dev_lock(hdev); + + monitor = idr_find(&hdev->adv_monitors_idr, msft->pending_add_handle); + if (!monitor) { + bt_dev_err(hdev, "msft add advmon: monitor %d is not found!", + msft->pending_add_handle); + status = HCI_ERROR_UNSPECIFIED; + goto unlock; + } + + if (status) + goto unlock; + + rp = (struct msft_rp_le_monitor_advertisement *)skb->data; + if (skb->len < sizeof(*rp)) { + status = HCI_ERROR_UNSPECIFIED; + goto unlock; + } + + handle_data = kmalloc(sizeof(*handle_data), GFP_KERNEL); + if (!handle_data) { + status = HCI_ERROR_UNSPECIFIED; + goto unlock; + } + + handle_data->mgmt_handle = monitor->handle; + handle_data->msft_handle = rp->handle; + INIT_LIST_HEAD(&handle_data->list); + list_add(&handle_data->list, &msft->handle_map); + + monitor->state = ADV_MONITOR_STATE_OFFLOADED; + +unlock: + if (status && monitor) + hci_free_adv_monitor(hdev, monitor); + + /* If in restart/reregister sequence, keep registering. */ + if (msft->reregistering) + reregister_monitor_on_restart(hdev, + msft->pending_add_handle + 1); + + hci_dev_unlock(hdev); + + if (!msft->reregistering) + hci_add_adv_patterns_monitor_complete(hdev, status); +} + +static void msft_le_cancel_monitor_advertisement_cb(struct hci_dev *hdev, + u8 status, u16 opcode, + struct sk_buff *skb) +{ + struct msft_cp_le_cancel_monitor_advertisement *cp; + struct msft_rp_le_cancel_monitor_advertisement *rp; + struct adv_monitor *monitor; + struct msft_monitor_advertisement_handle_data *handle_data; + struct msft_data *msft = hdev->msft_data; + int err; + bool pending; + + if (status) + goto done; + + rp = (struct msft_rp_le_cancel_monitor_advertisement *)skb->data; + if (skb->len < sizeof(*rp)) { + status = HCI_ERROR_UNSPECIFIED; + goto done; + } + + hci_dev_lock(hdev); + + cp = hci_sent_cmd_data(hdev, hdev->msft_opcode); + handle_data = msft_find_handle_data(hdev, cp->handle, false); + + if (handle_data) { + monitor = idr_find(&hdev->adv_monitors_idr, + handle_data->mgmt_handle); + if (monitor) + hci_free_adv_monitor(hdev, monitor); + + list_del(&handle_data->list); + kfree(handle_data); + } + + /* If remove all monitors is required, we need to continue the process + * here because the earlier it was paused when waiting for the + * response from controller. + */ + if (msft->pending_remove_handle == 0) { + pending = hci_remove_all_adv_monitor(hdev, &err); + if (pending) { + hci_dev_unlock(hdev); + return; + } + + if (err) + status = HCI_ERROR_UNSPECIFIED; + } + + hci_dev_unlock(hdev); + +done: + hci_remove_adv_monitor_complete(hdev, status); +} + +static void msft_le_set_advertisement_filter_enable_cb(struct hci_dev *hdev, + u8 status, u16 opcode, + struct sk_buff *skb) +{ + struct msft_cp_le_set_advertisement_filter_enable *cp; + struct msft_rp_le_set_advertisement_filter_enable *rp; + struct msft_data *msft = hdev->msft_data; + + rp = (struct msft_rp_le_set_advertisement_filter_enable *)skb->data; + if (skb->len < sizeof(*rp)) + return; + + /* Error 0x0C would be returned if the filter enabled status is + * already set to whatever we were trying to set. + * Although the default state should be disabled, some controller set + * the initial value to enabled. Because there is no way to know the + * actual initial value before sending this command, here we also treat + * error 0x0C as success. + */ + if (status != 0x00 && status != 0x0C) + return; + + hci_dev_lock(hdev); + + cp = hci_sent_cmd_data(hdev, hdev->msft_opcode); + msft->filter_enabled = cp->enable; + + if (status == 0x0C) + bt_dev_warn(hdev, "MSFT filter_enable is already %s", + cp->enable ? "on" : "off"); + + hci_dev_unlock(hdev); +} + +static bool msft_monitor_rssi_valid(struct adv_monitor *monitor) +{ + struct adv_rssi_thresholds *r = &monitor->rssi; + + if (r->high_threshold < MSFT_RSSI_THRESHOLD_VALUE_MIN || + r->high_threshold > MSFT_RSSI_THRESHOLD_VALUE_MAX || + r->low_threshold < MSFT_RSSI_THRESHOLD_VALUE_MIN || + r->low_threshold > MSFT_RSSI_THRESHOLD_VALUE_MAX) + return false; + + /* High_threshold_timeout is not supported, + * once high_threshold is reached, events are immediately reported. + */ + if (r->high_threshold_timeout != 0) + return false; + + if (r->low_threshold_timeout > MSFT_RSSI_LOW_TIMEOUT_MAX) + return false; + + /* Sampling period from 0x00 to 0xFF are all allowed */ + return true; +} + +static bool msft_monitor_pattern_valid(struct adv_monitor *monitor) +{ + return msft_monitor_rssi_valid(monitor); + /* No additional check needed for pattern-based monitor */ +} + +/* This function requires the caller holds hdev->lock */ +static int __msft_add_monitor_pattern(struct hci_dev *hdev, + struct adv_monitor *monitor) +{ + struct msft_cp_le_monitor_advertisement *cp; + struct msft_le_monitor_advertisement_pattern_data *pattern_data; + struct msft_le_monitor_advertisement_pattern *pattern; + struct adv_pattern *entry; + struct hci_request req; + struct msft_data *msft = hdev->msft_data; + size_t total_size = sizeof(*cp) + sizeof(*pattern_data); + ptrdiff_t offset = 0; + u8 pattern_count = 0; + int err = 0; + + if (!msft_monitor_pattern_valid(monitor)) + return -EINVAL; + + list_for_each_entry(entry, &monitor->patterns, list) { + pattern_count++; + total_size += sizeof(*pattern) + entry->length; + } + + cp = kmalloc(total_size, GFP_KERNEL); + if (!cp) + return -ENOMEM; + + cp->sub_opcode = MSFT_OP_LE_MONITOR_ADVERTISEMENT; + cp->rssi_high = monitor->rssi.high_threshold; + cp->rssi_low = monitor->rssi.low_threshold; + cp->rssi_low_interval = (u8)monitor->rssi.low_threshold_timeout; + cp->rssi_sampling_period = monitor->rssi.sampling_period; + + cp->cond_type = MSFT_MONITOR_ADVERTISEMENT_TYPE_PATTERN; + + pattern_data = (void *)cp->data; + pattern_data->count = pattern_count; + + list_for_each_entry(entry, &monitor->patterns, list) { + pattern = (void *)(pattern_data->data + offset); + /* the length also includes data_type and offset */ + pattern->length = entry->length + 2; + pattern->data_type = entry->ad_type; + pattern->start_byte = entry->offset; + memcpy(pattern->pattern, entry->value, entry->length); + offset += sizeof(*pattern) + entry->length; + } + + hci_req_init(&req, hdev); + hci_req_add(&req, hdev->msft_opcode, total_size, cp); + err = hci_req_run_skb(&req, msft_le_monitor_advertisement_cb); + kfree(cp); + + if (!err) + msft->pending_add_handle = monitor->handle; + + return err; +} + +/* This function requires the caller holds hdev->lock */ +int msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor) +{ + struct msft_data *msft = hdev->msft_data; + + if (!msft) + return -EOPNOTSUPP; + + if (msft->reregistering) + return -EBUSY; + + return __msft_add_monitor_pattern(hdev, monitor); +} + +/* This function requires the caller holds hdev->lock */ +int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, + u16 handle) +{ + struct msft_cp_le_cancel_monitor_advertisement cp; + struct msft_monitor_advertisement_handle_data *handle_data; + struct hci_request req; + struct msft_data *msft = hdev->msft_data; + int err = 0; + + if (!msft) + return -EOPNOTSUPP; + + if (msft->reregistering) + return -EBUSY; + + handle_data = msft_find_handle_data(hdev, monitor->handle, true); + + /* If no matched handle, just remove without telling controller */ + if (!handle_data) + return -ENOENT; + + cp.sub_opcode = MSFT_OP_LE_CANCEL_MONITOR_ADVERTISEMENT; + cp.handle = handle_data->msft_handle; + + hci_req_init(&req, hdev); + hci_req_add(&req, hdev->msft_opcode, sizeof(cp), &cp); + err = hci_req_run_skb(&req, msft_le_cancel_monitor_advertisement_cb); + + if (!err) + msft->pending_remove_handle = handle; + + return err; +} + +void msft_req_add_set_filter_enable(struct hci_request *req, bool enable) +{ + struct hci_dev *hdev = req->hdev; + struct msft_cp_le_set_advertisement_filter_enable cp; + + cp.sub_opcode = MSFT_OP_LE_SET_ADVERTISEMENT_FILTER_ENABLE; + cp.enable = enable; + + hci_req_add(req, hdev->msft_opcode, sizeof(cp), &cp); +} + +int msft_set_filter_enable(struct hci_dev *hdev, bool enable) +{ + struct hci_request req; + struct msft_data *msft = hdev->msft_data; + int err; + + if (!msft) + return -EOPNOTSUPP; + + hci_req_init(&req, hdev); + msft_req_add_set_filter_enable(&req, enable); + err = hci_req_run_skb(&req, msft_le_set_advertisement_filter_enable_cb); + + return err; } diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h index e9c478e890b8..88ed613dfa08 100644 --- a/net/bluetooth/msft.h +++ b/net/bluetooth/msft.h @@ -12,16 +12,46 @@ #if IS_ENABLED(CONFIG_BT_MSFTEXT) +bool msft_monitor_supported(struct hci_dev *hdev); void msft_do_open(struct hci_dev *hdev); void msft_do_close(struct hci_dev *hdev); void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb); __u64 msft_get_features(struct hci_dev *hdev); +int msft_add_monitor_pattern(struct hci_dev *hdev, struct adv_monitor *monitor); +int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor, + u16 handle); +void msft_req_add_set_filter_enable(struct hci_request *req, bool enable); +int msft_set_filter_enable(struct hci_dev *hdev, bool enable); #else +static inline bool msft_monitor_supported(struct hci_dev *hdev) +{ + return false; +} + static inline void msft_do_open(struct hci_dev *hdev) {} static inline void msft_do_close(struct hci_dev *hdev) {} static inline void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb) {} static inline __u64 msft_get_features(struct hci_dev *hdev) { return 0; } +static inline int msft_add_monitor_pattern(struct hci_dev *hdev, + struct adv_monitor *monitor) +{ + return -EOPNOTSUPP; +} + +static inline int msft_remove_monitor(struct hci_dev *hdev, + struct adv_monitor *monitor, + u16 handle) +{ + return -EOPNOTSUPP; +} + +static inline void msft_req_add_set_filter_enable(struct hci_request *req, + bool enable) {} +static inline int msft_set_filter_enable(struct hci_dev *hdev, bool enable) +{ + return -EOPNOTSUPP; +} #endif diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index c659c464f7ca..b0c1ee110eff 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -25,7 +25,6 @@ #include <linux/crypto.h> #include <crypto/aes.h> #include <crypto/algapi.h> -#include <crypto/b128ops.h> #include <crypto/hash.h> #include <crypto/kpp.h> @@ -425,7 +424,7 @@ static int smp_c1(const u8 k[16], SMP_DBG("p1 %16phN", p1); /* res = r XOR p1 */ - u128_xor((u128 *) res, (u128 *) r, (u128 *) p1); + crypto_xor_cpy(res, r, p1, sizeof(p1)); /* res = e(k, res) */ err = smp_e(k, res); @@ -442,7 +441,7 @@ static int smp_c1(const u8 k[16], SMP_DBG("p2 %16phN", p2); /* res = res XOR p2 */ - u128_xor((u128 *) res, (u128 *) res, (u128 *) p2); + crypto_xor(res, p2, sizeof(p2)); /* res = e(k, res) */ err = smp_e(k, res); diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c index fc0a98874bfc..01c67ed727a9 100644 --- a/net/bridge/br_mrp.c +++ b/net/bridge/br_mrp.c @@ -557,19 +557,22 @@ int br_mrp_del(struct net_bridge *br, struct br_mrp_instance *instance) int br_mrp_set_port_state(struct net_bridge_port *p, enum br_mrp_port_state_type state) { + u32 port_state; + if (!p || !(p->flags & BR_MRP_AWARE)) return -EINVAL; spin_lock_bh(&p->br->lock); if (state == BR_MRP_PORT_STATE_FORWARDING) - p->state = BR_STATE_FORWARDING; + port_state = BR_STATE_FORWARDING; else - p->state = BR_STATE_BLOCKING; + port_state = BR_STATE_BLOCKING; + p->state = port_state; spin_unlock_bh(&p->br->lock); - br_mrp_port_switchdev_set_state(p, state); + br_mrp_port_switchdev_set_state(p, port_state); return 0; } diff --git a/net/bridge/br_mrp_switchdev.c b/net/bridge/br_mrp_switchdev.c index ed547e03ace1..75a7e8d0a268 100644 --- a/net/bridge/br_mrp_switchdev.c +++ b/net/bridge/br_mrp_switchdev.c @@ -169,13 +169,12 @@ int br_mrp_switchdev_send_in_test(struct net_bridge *br, struct br_mrp *mrp, return err; } -int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, - enum br_mrp_port_state_type state) +int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, u32 state) { struct switchdev_attr attr = { .orig_dev = p->dev, - .id = SWITCHDEV_ATTR_ID_MRP_PORT_STATE, - .u.mrp_port_state = state, + .id = SWITCHDEV_ATTR_ID_PORT_STP_STATE, + .u.stp_state = state, }; int err; diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h index 32a48e5418da..2514954c1431 100644 --- a/net/bridge/br_private_mrp.h +++ b/net/bridge/br_private_mrp.h @@ -72,8 +72,7 @@ int br_mrp_switchdev_set_ring_state(struct net_bridge *br, struct br_mrp *mrp, int br_mrp_switchdev_send_ring_test(struct net_bridge *br, struct br_mrp *mrp, u32 interval, u8 max_miss, u32 period, bool monitor); -int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, - enum br_mrp_port_state_type state); +int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, u32 state); int br_mrp_port_switchdev_set_role(struct net_bridge_port *p, enum br_mrp_port_role_type role); int br_mrp_switchdev_set_in_role(struct net_bridge *br, struct br_mrp *mrp, diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 96ff63cde1be..5aea9427ffe1 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -59,9 +59,8 @@ static BRPORT_ATTR(_name, 0644, \ static int store_flag(struct net_bridge_port *p, unsigned long v, unsigned long mask) { - unsigned long flags; - - flags = p->flags; + unsigned long flags = p->flags; + int err; if (v) flags |= mask; @@ -69,6 +68,10 @@ static int store_flag(struct net_bridge_port *p, unsigned long v, flags &= ~mask; if (flags != p->flags) { + err = br_switchdev_set_port_flag(p, flags, mask); + if (err) + return err; + p->flags = flags; br_port_flags_change(p, mask); } diff --git a/net/core/datagram.c b/net/core/datagram.c index 81809fa735a7..15ab9ffb27fe 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -721,8 +721,16 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, struct iov_iter *to, int len, __wsum *csump) { - return __skb_datagram_iter(skb, offset, to, len, true, - csum_and_copy_to_iter, csump); + struct csum_state csdata = { .csum = *csump }; + int ret; + + ret = __skb_datagram_iter(skb, offset, to, len, true, + csum_and_copy_to_iter, &csdata); + if (ret) + return ret; + + *csump = csdata.csum; + return 0; } /** diff --git a/net/core/dev.c b/net/core/dev.c index 21d74d30f5d7..ce6291bc2e16 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -91,6 +91,7 @@ #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/skbuff.h> +#include <linux/kthread.h> #include <linux/bpf.h> #include <linux/bpf_trace.h> #include <net/net_namespace.h> @@ -1494,6 +1495,27 @@ void netdev_notify_peers(struct net_device *dev) } EXPORT_SYMBOL(netdev_notify_peers); +static int napi_threaded_poll(void *data); + +static int napi_kthread_create(struct napi_struct *n) +{ + int err = 0; + + /* Create and wake up the kthread once to put it in + * TASK_INTERRUPTIBLE mode to avoid the blocked task + * warning and work with loadavg. + */ + n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d", + n->dev->name, n->napi_id); + if (IS_ERR(n->thread)) { + err = PTR_ERR(n->thread); + pr_err("kthread_run failed with err %d\n", err); + n->thread = NULL; + } + + return err; +} + static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; @@ -4265,6 +4287,22 @@ int gro_normal_batch __read_mostly = 8; static inline void ____napi_schedule(struct softnet_data *sd, struct napi_struct *napi) { + struct task_struct *thread; + + if (test_bit(NAPI_STATE_THREADED, &napi->state)) { + /* Paired with smp_mb__before_atomic() in + * napi_enable()/dev_set_threaded(). + * Use READ_ONCE() to guarantee a complete + * read on napi->thread. Only call + * wake_up_process() when it's not NULL. + */ + thread = READ_ONCE(napi->thread); + if (thread) { + wake_up_process(thread); + return; + } + } + list_add_tail(&napi->poll_list, &sd->poll_list); __raise_softirq_irqoff(NET_RX_SOFTIRQ); } @@ -5745,10 +5783,11 @@ static void gro_normal_list(struct napi_struct *napi) /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, * pass the whole batch up to the stack. */ -static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) +static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs) { list_add_tail(&skb->list, &napi->rx_list); - if (++napi->rx_count >= gro_normal_batch) + napi->rx_count += segs; + if (napi->rx_count >= gro_normal_batch) gro_normal_list(napi); } @@ -5785,7 +5824,7 @@ static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) } out: - gro_normal_one(napi, skb); + gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count); return NET_RX_SUCCESS; } @@ -6071,7 +6110,7 @@ static gro_result_t napi_skb_finish(struct napi_struct *napi, { switch (ret) { case GRO_NORMAL: - gro_normal_one(napi, skb); + gro_normal_one(napi, skb, 1); break; case GRO_MERGED_FREE: @@ -6155,7 +6194,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, __skb_push(skb, ETH_HLEN); skb->protocol = eth_type_trans(skb, skb->dev); if (ret == GRO_NORMAL) - gro_normal_one(napi, skb); + gro_normal_one(napi, skb, 1); break; case GRO_MERGED_FREE: @@ -6701,6 +6740,49 @@ static void init_gro_hash(struct napi_struct *napi) napi->gro_bitmask = 0; } +int dev_set_threaded(struct net_device *dev, bool threaded) +{ + struct napi_struct *napi; + int err = 0; + + if (dev->threaded == threaded) + return 0; + + if (threaded) { + list_for_each_entry(napi, &dev->napi_list, dev_list) { + if (!napi->thread) { + err = napi_kthread_create(napi); + if (err) { + threaded = false; + break; + } + } + } + } + + dev->threaded = threaded; + + /* Make sure kthread is created before THREADED bit + * is set. + */ + smp_mb__before_atomic(); + + /* Setting/unsetting threaded mode on a napi might not immediately + * take effect, if the current napi instance is actively being + * polled. In this case, the switch between threaded mode and + * softirq mode will happen in the next round of napi_schedule(). + * This should not cause hiccups/stalls to the live traffic. + */ + list_for_each_entry(napi, &dev->napi_list, dev_list) { + if (threaded) + set_bit(NAPI_STATE_THREADED, &napi->state); + else + clear_bit(NAPI_STATE_THREADED, &napi->state); + } + + return err; +} + void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { @@ -6728,6 +6810,12 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, set_bit(NAPI_STATE_NPSVC, &napi->state); list_add_rcu(&napi->dev_list, &dev->napi_list); napi_hash_add(napi); + /* Create kthread for this napi if dev->threaded is set. + * Clear dev->threaded if kthread creation failed so that + * threaded mode will not be enabled in napi_enable(). + */ + if (dev->threaded && napi_kthread_create(napi)) + dev->threaded = 0; } EXPORT_SYMBOL(netif_napi_add); @@ -6745,9 +6833,28 @@ void napi_disable(struct napi_struct *n) clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); clear_bit(NAPI_STATE_DISABLE, &n->state); + clear_bit(NAPI_STATE_THREADED, &n->state); } EXPORT_SYMBOL(napi_disable); +/** + * napi_enable - enable NAPI scheduling + * @n: NAPI context + * + * Resume NAPI from being scheduled on this context. + * Must be paired with napi_disable. + */ +void napi_enable(struct napi_struct *n) +{ + BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); + smp_mb__before_atomic(); + clear_bit(NAPI_STATE_SCHED, &n->state); + clear_bit(NAPI_STATE_NPSVC, &n->state); + if (n->dev->threaded && n->thread) + set_bit(NAPI_STATE_THREADED, &n->state); +} +EXPORT_SYMBOL(napi_enable); + static void flush_gro_hash(struct napi_struct *napi) { int i; @@ -6773,18 +6880,18 @@ void __netif_napi_del(struct napi_struct *napi) flush_gro_hash(napi); napi->gro_bitmask = 0; + + if (napi->thread) { + kthread_stop(napi->thread); + napi->thread = NULL; + } } EXPORT_SYMBOL(__netif_napi_del); -static int napi_poll(struct napi_struct *n, struct list_head *repoll) +static int __napi_poll(struct napi_struct *n, bool *repoll) { - void *have; int work, weight; - list_del_init(&n->poll_list); - - have = netpoll_poll_lock(n); - weight = n->weight; /* This NAPI_STATE_SCHED test is for avoiding a race @@ -6804,7 +6911,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) n->poll, work, weight); if (likely(work < weight)) - goto out_unlock; + return work; /* Drivers must not modify the NAPI state if they * consume the entire weight. In such cases this code @@ -6813,7 +6920,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) */ if (unlikely(napi_disable_pending(n))) { napi_complete(n); - goto out_unlock; + return work; } /* The NAPI context has more processing work, but busy-polling @@ -6826,7 +6933,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) */ napi_schedule(n); } - goto out_unlock; + return work; } if (n->gro_bitmask) { @@ -6844,17 +6951,79 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) if (unlikely(!list_empty(&n->poll_list))) { pr_warn_once("%s: Budget exhausted after napi rescheduled\n", n->dev ? n->dev->name : "backlog"); - goto out_unlock; + return work; } - list_add_tail(&n->poll_list, repoll); + *repoll = true; + + return work; +} + +static int napi_poll(struct napi_struct *n, struct list_head *repoll) +{ + bool do_repoll = false; + void *have; + int work; + + list_del_init(&n->poll_list); + + have = netpoll_poll_lock(n); + + work = __napi_poll(n, &do_repoll); + + if (do_repoll) + list_add_tail(&n->poll_list, repoll); -out_unlock: netpoll_poll_unlock(have); return work; } +static int napi_thread_wait(struct napi_struct *napi) +{ + set_current_state(TASK_INTERRUPTIBLE); + + while (!kthread_should_stop() && !napi_disable_pending(napi)) { + if (test_bit(NAPI_STATE_SCHED, &napi->state)) { + WARN_ON(!list_empty(&napi->poll_list)); + __set_current_state(TASK_RUNNING); + return 0; + } + + schedule(); + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); + return -1; +} + +static int napi_threaded_poll(void *data) +{ + struct napi_struct *napi = data; + void *have; + + while (!napi_thread_wait(napi)) { + for (;;) { + bool repoll = false; + + local_bh_disable(); + + have = netpoll_poll_lock(napi); + __napi_poll(napi, &repoll); + netpoll_poll_unlock(have); + + __kfree_skb_flush(); + local_bh_enable(); + + if (!repoll) + break; + + cond_resched(); + } + } + return 0; +} + static __latent_entropy void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); @@ -8768,6 +8937,48 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, } EXPORT_SYMBOL(dev_set_mac_address); +static DECLARE_RWSEM(dev_addr_sem); + +int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack) +{ + int ret; + + down_write(&dev_addr_sem); + ret = dev_set_mac_address(dev, sa, extack); + up_write(&dev_addr_sem); + return ret; +} +EXPORT_SYMBOL(dev_set_mac_address_user); + +int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) +{ + size_t size = sizeof(sa->sa_data); + struct net_device *dev; + int ret = 0; + + down_read(&dev_addr_sem); + rcu_read_lock(); + + dev = dev_get_by_name_rcu(net, dev_name); + if (!dev) { + ret = -ENODEV; + goto unlock; + } + if (!dev->addr_len) + memset(sa->sa_data, 0, size); + else + memcpy(sa->sa_data, dev->dev_addr, + min_t(size_t, size, dev->addr_len)); + sa->sa_family = dev->type; + +unlock: + rcu_read_unlock(); + up_read(&dev_addr_sem); + return ret; +} +EXPORT_SYMBOL(dev_get_mac_address); + /** * dev_change_carrier - Change device carrier * @dev: device diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index db8a0ff86f36..478d032f34ac 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -123,17 +123,6 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm ifr->ifr_mtu = dev->mtu; return 0; - case SIOCGIFHWADDR: - if (!dev->addr_len) - memset(ifr->ifr_hwaddr.sa_data, 0, - sizeof(ifr->ifr_hwaddr.sa_data)); - else - memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, - min(sizeof(ifr->ifr_hwaddr.sa_data), - (size_t)dev->addr_len)); - ifr->ifr_hwaddr.sa_family = dev->type; - return 0; - case SIOCGIFSLAVE: err = -EINVAL; break; @@ -274,7 +263,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) case SIOCSIFHWADDR: if (dev->addr_len > sizeof(struct sockaddr)) return -EINVAL; - return dev_set_mac_address(dev, &ifr->ifr_hwaddr, NULL); + return dev_set_mac_address_user(dev, &ifr->ifr_hwaddr, NULL); case SIOCSIFHWBROADCAST: if (ifr->ifr_hwaddr.sa_family != dev->type) @@ -418,6 +407,12 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c */ switch (cmd) { + case SIOCGIFHWADDR: + dev_load(net, ifr->ifr_name); + ret = dev_get_mac_address(&ifr->ifr_hwaddr, net, ifr->ifr_name); + if (colon) + *colon = ':'; + return ret; /* * These ioctl calls: * - can be done by all. @@ -427,7 +422,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c case SIOCGIFFLAGS: case SIOCGIFMETRIC: case SIOCGIFMTU: - case SIOCGIFHWADDR: case SIOCGIFSLAVE: case SIOCGIFMAP: case SIOCGIFINDEX: diff --git a/net/core/filter.c b/net/core/filter.c index 9ab94e90d660..3b728ab79a61 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4645,11 +4645,9 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = { static u64 __bpf_get_netns_cookie(struct sock *sk) { -#ifdef CONFIG_NET_NS - return __net_gen_cookie(sk ? sk->sk_net.net : &init_net); -#else - return 0; -#endif + const struct net *net = sk ? sock_net(sk) : &init_net; + + return net->net_cookie; } BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx) @@ -8816,7 +8814,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, target_size)); break; case offsetof(struct bpf_sock, rx_queue_mapping): -#ifdef CONFIG_XPS +#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING *insn++ = BPF_LDX_MEM( BPF_FIELD_SIZEOF(struct sock, sk_rx_queue_mapping), si->dst_reg, si->src_reg, diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index daf502c13d6d..307628fdf380 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -538,6 +538,45 @@ static ssize_t phys_switch_id_show(struct device *dev, } static DEVICE_ATTR_RO(phys_switch_id); +static ssize_t threaded_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + ssize_t ret = -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (dev_isalive(netdev)) + ret = sprintf(buf, fmt_dec, netdev->threaded); + + rtnl_unlock(); + return ret; +} + +static int modify_napi_threaded(struct net_device *dev, unsigned long val) +{ + int ret; + + if (list_empty(&dev->napi_list)) + return -EOPNOTSUPP; + + if (val != 0 && val != 1) + return -EOPNOTSUPP; + + ret = dev_set_threaded(dev, val); + + return ret; +} + +static ssize_t threaded_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return netdev_store(dev, attr, buf, len, modify_napi_threaded); +} +static DEVICE_ATTR_RW(threaded); + static struct attribute *net_class_attrs[] __ro_after_init = { &dev_attr_netdev_group.attr, &dev_attr_type.attr, @@ -570,6 +609,7 @@ static struct attribute *net_class_attrs[] __ro_after_init = { &dev_attr_proto_down.attr, &dev_attr_carrier_up_count.attr, &dev_attr_carrier_down_count.attr, + &dev_attr_threaded.attr, NULL, }; ATTRIBUTE_GROUPS(net_class); @@ -1136,18 +1176,25 @@ static ssize_t traffic_class_show(struct netdev_queue *queue, char *buf) { struct net_device *dev = queue->dev; + int num_tc, tc; int index; - int tc; if (!netif_is_multiqueue(dev)) return -ENOENT; + if (!rtnl_trylock()) + return restart_syscall(); + index = get_netdev_queue_index(queue); /* If queue belongs to subordinate dev use its TC mapping */ dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev; + num_tc = dev->num_tc; tc = netdev_txq_to_tc(dev, index); + + rtnl_unlock(); + if (tc < 0) return -EINVAL; @@ -1158,8 +1205,8 @@ static ssize_t traffic_class_show(struct netdev_queue *queue, * belongs to the root device it will be reported with just the * traffic class, so just "0" for TC 0 for example. */ - return dev->num_tc < 0 ? sprintf(buf, "%d%d\n", tc, dev->num_tc) : - sprintf(buf, "%d\n", tc); + return num_tc < 0 ? sprintf(buf, "%d%d\n", tc, num_tc) : + sprintf(buf, "%d\n", tc); } #ifdef CONFIG_XPS diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2ef3b4557f40..43b6ac4c4439 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -72,18 +72,6 @@ static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; DEFINE_COOKIE(net_cookie); -u64 __net_gen_cookie(struct net *net) -{ - while (1) { - u64 res = atomic64_read(&net->net_cookie); - - if (res) - return res; - res = gen_cookie_next(&net_cookie); - atomic64_cmpxchg(&net->net_cookie, 0, res); - } -} - static struct net_generic *net_alloc_generic(void) { struct net_generic *ng; @@ -332,6 +320,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) refcount_set(&net->ns.count, 1); refcount_set(&net->passive, 1); get_random_bytes(&net->hash_mix, sizeof(u32)); + preempt_disable(); + net->net_cookie = gen_cookie_next(&net_cookie); + preempt_enable(); net->dev_base_seq = 1; net->user_ns = user_ns; idr_init(&net->netns_ids); @@ -1103,10 +1094,6 @@ static int __init net_ns_init(void) rcu_assign_pointer(init_net.gen, ng); - preempt_disable(); - __net_gen_cookie(&init_net); - preempt_enable(); - down_write(&pernet_ops_rwsem); if (setup_net(&init_net, &init_user_ns)) panic("Could not setup the initial network namespace"); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c313aaf2bce1..0edc0b2baaa4 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2660,7 +2660,7 @@ static int do_setlink(const struct sk_buff *skb, sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), dev->addr_len); - err = dev_set_mac_address(dev, sa, extack); + err = dev_set_mac_address_user(dev, sa, extack); kfree(sa); if (err) goto errout; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 96249c4ad5f2..4d4956ed303b 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -724,20 +724,23 @@ static int dsa_switch_setup(struct dsa_switch *ds) ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); if (!ds->slave_mii_bus) { err = -ENOMEM; - goto unregister_notifier; + goto teardown; } dsa_slave_mii_bus_init(ds); err = mdiobus_register(ds->slave_mii_bus); if (err < 0) - goto unregister_notifier; + goto teardown; } ds->setup = true; return 0; +teardown: + if (ds->ops->teardown) + ds->ops->teardown(ds); unregister_notifier: dsa_switch_unregister_notifier(ds); unregister_devlink_ports: diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 8a1bcb2b4208..7060f128386b 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -20,6 +20,8 @@ enum { DSA_NOTIFIER_BRIDGE_LEAVE, DSA_NOTIFIER_FDB_ADD, DSA_NOTIFIER_FDB_DEL, + DSA_NOTIFIER_HSR_JOIN, + DSA_NOTIFIER_HSR_LEAVE, DSA_NOTIFIER_LAG_CHANGE, DSA_NOTIFIER_LAG_JOIN, DSA_NOTIFIER_LAG_LEAVE, @@ -100,6 +102,13 @@ struct dsa_switchdev_event_work { u16 vid; }; +/* DSA_NOTIFIER_HSR_* */ +struct dsa_notifier_hsr_info { + struct net_device *hsr; + int sw_index; + int port; +}; + struct dsa_slave_priv { /* Copy of CPU port xmit for faster access in slave transmit hot path */ struct sk_buff * (*xmit)(struct sk_buff *skb, @@ -183,6 +192,8 @@ int dsa_port_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan); int dsa_port_link_register_of(struct dsa_port *dp); void dsa_port_link_unregister_of(struct dsa_port *dp); +int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr); +void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr); extern const struct phylink_mac_ops dsa_port_phylink_mac_ops; static inline bool dsa_port_offloads_netdev(struct dsa_port *dp, diff --git a/net/dsa/port.c b/net/dsa/port.c index 5e079a61528e..b93bda463026 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -868,3 +868,37 @@ int dsa_port_get_phy_sset_count(struct dsa_port *dp) return ret; } EXPORT_SYMBOL_GPL(dsa_port_get_phy_sset_count); + +int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr) +{ + struct dsa_notifier_hsr_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .hsr = hsr, + }; + int err; + + dp->hsr_dev = hsr; + + err = dsa_port_notify(dp, DSA_NOTIFIER_HSR_JOIN, &info); + if (err) + dp->hsr_dev = NULL; + + return err; +} + +void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr) +{ + struct dsa_notifier_hsr_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .hsr = hsr, + }; + int err; + + dp->hsr_dev = NULL; + + err = dsa_port_notify(dp, DSA_NOTIFIER_HSR_LEAVE, &info); + if (err) + pr_err("DSA: failed to notify DSA_NOTIFIER_HSR_LEAVE\n"); +} diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 431bdbdd8473..a95e78d59740 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -17,6 +17,7 @@ #include <net/pkt_cls.h> #include <net/tc_act/tc_mirred.h> #include <linux/if_bridge.h> +#include <linux/if_hsr.h> #include <linux/netpoll.h> #include <linux/ptp_classify.h> @@ -1938,6 +1939,19 @@ static int dsa_slave_changeupper(struct net_device *dev, dsa_port_lag_leave(dp, info->upper_dev); err = NOTIFY_OK; } + } else if (is_hsr_master(info->upper_dev)) { + if (info->linking) { + err = dsa_port_hsr_join(dp, info->upper_dev); + if (err == -EOPNOTSUPP) { + NL_SET_ERR_MSG_MOD(info->info.extack, + "Offloading not supported"); + err = 0; + } + err = notifier_from_errno(err); + } else { + dsa_port_hsr_leave(dp, info->upper_dev); + err = NOTIFY_OK; + } } return err; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 5026e4143663..1906179e59f7 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -166,6 +166,24 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds, return ds->ops->port_fdb_del(ds, port, info->addr, info->vid); } +static int dsa_switch_hsr_join(struct dsa_switch *ds, + struct dsa_notifier_hsr_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_hsr_join) + return ds->ops->port_hsr_join(ds, info->port, info->hsr); + + return -EOPNOTSUPP; +} + +static int dsa_switch_hsr_leave(struct dsa_switch *ds, + struct dsa_notifier_hsr_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_hsr_leave) + return ds->ops->port_hsr_leave(ds, info->port, info->hsr); + + return -EOPNOTSUPP; +} + static int dsa_switch_lag_change(struct dsa_switch *ds, struct dsa_notifier_lag_info *info) { @@ -371,6 +389,12 @@ static int dsa_switch_event(struct notifier_block *nb, case DSA_NOTIFIER_FDB_DEL: err = dsa_switch_fdb_del(ds, info); break; + case DSA_NOTIFIER_HSR_JOIN: + err = dsa_switch_hsr_join(ds, info); + break; + case DSA_NOTIFIER_HSR_LEAVE: + err = dsa_switch_hsr_leave(ds, info); + break; case DSA_NOTIFIER_LAG_CHANGE: err = dsa_switch_lag_change(ds, info); break; diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c index db0ed1a5fcb7..858cdf9d2913 100644 --- a/net/dsa/tag_xrs700x.c +++ b/net/dsa/tag_xrs700x.c @@ -11,12 +11,17 @@ static struct sk_buff *xrs700x_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *partner, *dp = dsa_slave_to_port(dev); u8 *trailer; trailer = skb_put(skb, 1); trailer[0] = BIT(dp->index); + if (dp->hsr_dev) + dsa_hsr_foreach_port(partner, dp->ds, dp->hsr_dev) + if (partner != dp) + trailer[0] |= BIT(partner->index); + return skb; } diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 835b9bba3e7e..c6a383dfd6c2 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -69,6 +69,10 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { [NETIF_F_GRO_FRAGLIST_BIT] = "rx-gro-list", [NETIF_F_HW_MACSEC_BIT] = "macsec-hw-offload", [NETIF_F_GRO_UDP_FWD_BIT] = "rx-udp-gro-forwarding", + [NETIF_F_HW_HSR_TAG_INS_BIT] = "hsr-tag-ins-offload", + [NETIF_F_HW_HSR_TAG_RM_BIT] = "hsr-tag-rm-offload", + [NETIF_F_HW_HSR_FWD_BIT] = "hsr-fwd-offload", + [NETIF_F_HW_HSR_DUP_BIT] = "hsr-dup-offload", }; const char diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index ab953a1a0d6c..7444ec6e298e 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -230,7 +230,7 @@ static const struct header_ops hsr_header_ops = { .parse = eth_header_parse, }; -static struct sk_buff *hsr_init_skb(struct hsr_port *master, u16 proto) +static struct sk_buff *hsr_init_skb(struct hsr_port *master) { struct hsr_priv *hsr = master->hsr; struct sk_buff *skb; @@ -242,8 +242,7 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master, u16 proto) * being, for PRP it is a trailer and for HSR it is a * header */ - skb = dev_alloc_skb(sizeof(struct hsr_tag) + - sizeof(struct hsr_sup_tag) + + skb = dev_alloc_skb(sizeof(struct hsr_sup_tag) + sizeof(struct hsr_sup_payload) + hlen + tlen); if (!skb) @@ -251,10 +250,9 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master, u16 proto) skb_reserve(skb, hlen); skb->dev = master->dev; - skb->protocol = htons(proto); skb->priority = TC_PRIO_CONTROL; - if (dev_hard_header(skb, skb->dev, proto, + if (dev_hard_header(skb, skb->dev, ETH_P_PRP, hsr->sup_multicast_addr, skb->dev->dev_addr, skb->len) <= 0) goto out; @@ -275,12 +273,10 @@ static void send_hsr_supervision_frame(struct hsr_port *master, { struct hsr_priv *hsr = master->hsr; __u8 type = HSR_TLV_LIFE_CHECK; - struct hsr_tag *hsr_tag = NULL; struct hsr_sup_payload *hsr_sp; struct hsr_sup_tag *hsr_stag; unsigned long irqflags; struct sk_buff *skb; - u16 proto; *interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); if (hsr->announce_count < 3 && hsr->prot_version == 0) { @@ -289,23 +285,12 @@ static void send_hsr_supervision_frame(struct hsr_port *master, hsr->announce_count++; } - if (!hsr->prot_version) - proto = ETH_P_PRP; - else - proto = ETH_P_HSR; - - skb = hsr_init_skb(master, proto); + skb = hsr_init_skb(master); if (!skb) { WARN_ONCE(1, "HSR: Could not send supervision frame\n"); return; } - if (hsr->prot_version > 0) { - hsr_tag = skb_put(skb, sizeof(struct hsr_tag)); - hsr_tag->encap_proto = htons(ETH_P_PRP); - set_hsr_tag_LSDU_size(hsr_tag, HSR_V1_SUP_LSDUSIZE); - } - hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag)); set_hsr_stag_path(hsr_stag, (hsr->prot_version ? 0x0 : 0xf)); set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version); @@ -315,8 +300,6 @@ static void send_hsr_supervision_frame(struct hsr_port *master, if (hsr->prot_version > 0) { hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr); hsr->sup_sequence_nr++; - hsr_tag->sequence_nr = htons(hsr->sequence_nr); - hsr->sequence_nr++; } else { hsr_stag->sequence_nr = htons(hsr->sequence_nr); hsr->sequence_nr++; @@ -332,7 +315,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr); - if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN)) + if (skb_put_padto(skb, ETH_ZLEN)) return; hsr_forward_skb(skb, master); @@ -348,10 +331,8 @@ static void send_prp_supervision_frame(struct hsr_port *master, struct hsr_sup_tag *hsr_stag; unsigned long irqflags; struct sk_buff *skb; - struct prp_rct *rct; - u8 *tail; - skb = hsr_init_skb(master, ETH_P_PRP); + skb = hsr_init_skb(master); if (!skb) { WARN_ONCE(1, "PRP: Could not send supervision frame\n"); return; @@ -373,17 +354,11 @@ static void send_prp_supervision_frame(struct hsr_port *master, hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr); - if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN)) { + if (skb_put_padto(skb, ETH_ZLEN)) { spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); return; } - tail = skb_tail_pointer(skb) - HSR_HLEN; - rct = (struct prp_rct *)tail; - rct->PRP_suffix = htons(ETH_P_PRP); - set_prp_LSDU_size(rct, HSR_V1_SUP_LSDUSIZE); - rct->sequence_nr = htons(hsr->sequence_nr); - hsr->sequence_nr++; spin_unlock_irqrestore(&master->hsr->seqnr_lock, irqflags); hsr_forward_skb(skb, master); @@ -442,6 +417,7 @@ static struct hsr_proto_ops hsr_ops = { .send_sv_frame = send_hsr_supervision_frame, .create_tagged_frame = hsr_create_tagged_frame, .get_untagged_frame = hsr_get_untagged_frame, + .drop_frame = hsr_drop_frame, .fill_frame_info = hsr_fill_frame_info, .invalid_dan_ingress_frame = hsr_invalid_dan_ingress_frame, }; @@ -489,10 +465,11 @@ void hsr_dev_setup(struct net_device *dev) /* Return true if dev is a HSR master; return false otherwise. */ -inline bool is_hsr_master(struct net_device *dev) +bool is_hsr_master(struct net_device *dev) { return (dev->netdev_ops->ndo_start_xmit == hsr_dev_xmit); } +EXPORT_SYMBOL(is_hsr_master); /* Default multicast address for HSR Supervision frames */ static const unsigned char def_multicast_addr[ETH_ALEN] __aligned(2) = { @@ -545,16 +522,6 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], hsr->prot_version = protocol_version; - /* FIXME: should I modify the value of these? - * - * - hsr_dev->flags - i.e. - * IFF_MASTER/SLAVE? - * - hsr_dev->priv_flags - i.e. - * IFF_EBRIDGE? - * IFF_TX_SKB_SHARING? - * IFF_HSR_MASTER/SLAVE? - */ - /* Make sure the 1st call to netif_carrier_on() gets through */ netif_carrier_off(hsr_dev); diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h index 868373822ee4..9060c92168f9 100644 --- a/net/hsr/hsr_device.h +++ b/net/hsr/hsr_device.h @@ -19,6 +19,5 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], unsigned char multicast_spec, u8 protocol_version, struct netlink_ext_ack *extack); void hsr_check_carrier_and_operstate(struct hsr_priv *hsr); -bool is_hsr_master(struct net_device *dev); int hsr_get_max_mtu(struct hsr_priv *hsr); #endif /* __HSR_DEVICE_H */ diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index cadfccd7876e..ed82a470b6e1 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -186,6 +186,7 @@ static struct sk_buff *prp_fill_rct(struct sk_buff *skb, set_prp_LSDU_size(trailer, lsdu_size); trailer->sequence_nr = htons(frame->sequence_nr); trailer->PRP_suffix = htons(ETH_P_PRP); + skb->protocol = eth_hdr(skb)->h_proto; return skb; } @@ -226,6 +227,7 @@ static struct sk_buff *hsr_fill_tag(struct sk_buff *skb, hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; hsr_ethhdr->ethhdr.h_proto = htons(proto_version ? ETH_P_HSR : ETH_P_PRP); + skb->protocol = hsr_ethhdr->ethhdr.h_proto; return skb; } @@ -247,6 +249,8 @@ struct sk_buff *hsr_create_tagged_frame(struct hsr_frame_info *frame, /* set the lane id properly */ hsr_set_path_id(hsr_ethhdr, port); return skb_clone(frame->skb_hsr, GFP_ATOMIC); + } else if (port->dev->features & NETIF_F_HW_HSR_TAG_INS) { + return skb_clone(frame->skb_std, GFP_ATOMIC); } /* Create the new skb with enough headroom to fit the HSR tag */ @@ -289,6 +293,8 @@ struct sk_buff *prp_create_tagged_frame(struct hsr_frame_info *frame, return NULL; } return skb_clone(frame->skb_prp, GFP_ATOMIC); + } else if (port->dev->features & NETIF_F_HW_HSR_TAG_INS) { + return skb_clone(frame->skb_std, GFP_ATOMIC); } skb = skb_copy_expand(frame->skb_std, 0, @@ -341,6 +347,14 @@ bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port) port->type == HSR_PT_SLAVE_A)); } +bool hsr_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port) +{ + if (port->dev->features & NETIF_F_HW_HSR_FWD) + return prp_drop_frame(frame, port); + + return false; +} + /* Forward the frame through all devices except: * - Back through the receiving device * - If it's a HSR frame: through a device where it has passed before @@ -357,6 +371,7 @@ static void hsr_forward_do(struct hsr_frame_info *frame) { struct hsr_port *port; struct sk_buff *skb; + bool sent = false; hsr_for_each_port(frame->port_rcv->hsr, port) { struct hsr_priv *hsr = port->hsr; @@ -372,6 +387,12 @@ static void hsr_forward_do(struct hsr_frame_info *frame) if (port->type != HSR_PT_MASTER && frame->is_local_exclusive) continue; + /* If hardware duplicate generation is enabled, only send out + * one port. + */ + if ((port->dev->features & NETIF_F_HW_HSR_DUP) && sent) + continue; + /* Don't send frame over port where it has been sent before. * Also fro SAN, this shouldn't be done. */ @@ -403,10 +424,12 @@ static void hsr_forward_do(struct hsr_frame_info *frame) } skb->dev = port->dev; - if (port->type == HSR_PT_MASTER) + if (port->type == HSR_PT_MASTER) { hsr_deliver_master(skb, port->dev, frame->node_src); - else - hsr_xmit(skb, port, frame); + } else { + if (!hsr_xmit(skb, port, frame)) + sent = true; + } } } @@ -454,7 +477,11 @@ static void handle_std_frame(struct sk_buff *skb, void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, struct hsr_frame_info *frame) { - if (proto == htons(ETH_P_PRP) || + struct hsr_port *port = frame->port_rcv; + struct hsr_priv *hsr = port->hsr; + + /* HSRv0 supervisory frames double as a tag so treat them as tagged. */ + if ((!hsr->prot_version && proto == htons(ETH_P_PRP)) || proto == htons(ETH_P_HSR)) { /* HSR tagged frame :- Data or Supervision */ frame->skb_std = NULL; diff --git a/net/hsr/hsr_forward.h b/net/hsr/hsr_forward.h index 618140d484ad..b6acaafa83fc 100644 --- a/net/hsr/hsr_forward.h +++ b/net/hsr/hsr_forward.h @@ -23,6 +23,7 @@ struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame, struct sk_buff *prp_get_untagged_frame(struct hsr_frame_info *frame, struct hsr_port *port); bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port); +bool hsr_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port); void prp_fill_frame_info(__be16 proto, struct sk_buff *skb, struct hsr_frame_info *frame); void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb, diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 5c97de459905..f9a8cc82ae2e 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -277,6 +277,8 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame) skb = frame->skb_hsr; else if (frame->skb_prp) skb = frame->skb_prp; + else if (frame->skb_std) + skb = frame->skb_std; if (!skb) return; diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index 2fd1976e5b1c..f7e284f23b1f 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -131,6 +131,17 @@ struct hsr_port *hsr_port_get_hsr(struct hsr_priv *hsr, enum hsr_port_type pt) return NULL; } +int hsr_get_version(struct net_device *dev, enum hsr_version *ver) +{ + struct hsr_priv *hsr; + + hsr = netdev_priv(dev); + *ver = hsr->prot_version; + + return 0; +} +EXPORT_SYMBOL(hsr_get_version); + static struct notifier_block hsr_nb = { .notifier_call = hsr_netdev_notify, /* Slave event notifications */ }; diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index a9c30a608e35..a169808ee78a 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -13,6 +13,7 @@ #include <linux/netdevice.h> #include <linux/list.h> #include <linux/if_vlan.h> +#include <linux/if_hsr.h> /* Time constants as specified in the HSR specification (IEC-62439-3 2010) * Table 8. @@ -171,13 +172,6 @@ struct hsr_port { enum hsr_port_type type; }; -/* used by driver internally to differentiate various protocols */ -enum hsr_version { - HSR_V0 = 0, - HSR_V1, - PRP_V1, -}; - struct hsr_frame_info; struct hsr_node; diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 36d5fcf09c61..c5227d42faf5 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -48,12 +48,14 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) goto finish_consume; } - /* For HSR, only tagged frames are expected, but for PRP - * there could be non tagged frames as well from Single - * attached nodes (SANs). + /* For HSR, only tagged frames are expected (unless the device offloads + * HSR tag removal), but for PRP there could be non tagged frames as + * well from Single attached nodes (SANs). */ protocol = eth_hdr(skb)->h_proto; - if (hsr->proto_ops->invalid_dan_ingress_frame && + + if (!(port->dev->features & NETIF_F_HW_HSR_TAG_RM) && + hsr->proto_ops->invalid_dan_ingress_frame && hsr->proto_ops->invalid_dan_ingress_frame(protocol)) goto finish_pass; diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 5bda5aeda579..601f5fbfc63f 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -285,7 +285,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ esp.esph = ip_esp_hdr(skb); - if (!hw_offload || (hw_offload && !skb_is_gso(skb))) { + if (!hw_offload || !skb_is_gso(skb)) { esp.nfrags = esp_output_head(x, skb, &esp); if (esp.nfrags < 0) return esp.nfrags; diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index aff454ef0fa3..b58db1ca4bfb 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -18,7 +18,8 @@ struct fib_alias { s16 fa_default; u8 offload:1, trap:1, - unused:6; + offload_failed:1, + unused:5; struct rcu_head rcu; }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 4c38facf91c0..a632b66bc13a 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -521,6 +521,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, fri.type = fa->fa_type; fri.offload = fa->offload; fri.trap = fa->trap; + fri.offload_failed = fa->offload_failed; err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags); if (err < 0) { /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ @@ -1811,6 +1812,8 @@ offload: rtm->rtm_flags |= RTM_F_OFFLOAD; if (fri->trap) rtm->rtm_flags |= RTM_F_TRAP; + if (fri->offload_failed) + rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED; nlmsg_end(skb, nlh); return 0; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 60559b708158..25cf387cca5b 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1047,12 +1047,20 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri) if (!fa_match) goto out; - if (fa_match->offload == fri->offload && fa_match->trap == fri->trap) + if (fa_match->offload == fri->offload && fa_match->trap == fri->trap && + fa_match->offload_failed == fri->offload_failed) goto out; fa_match->offload = fri->offload; fa_match->trap = fri->trap; + /* 2 means send notifications only if offload_failed was changed. */ + if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 && + fa_match->offload_failed == fri->offload_failed) + goto out; + + fa_match->offload_failed = fri->offload_failed; + if (!net->ipv4.sysctl_fib_notify_on_flag_change) goto out; @@ -1290,6 +1298,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->fa_default = -1; new_fa->offload = 0; new_fa->trap = 0; + new_fa->offload_failed = 0; hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); @@ -1350,6 +1359,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->fa_default = -1; new_fa->offload = 0; new_fa->trap = 0; + new_fa->offload_failed = 0; /* Insert new entry to the list. */ err = fib_insert_alias(t, tp, l, new_fa, fa, key); @@ -2289,6 +2299,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, fri.type = fa->fa_type; fri.offload = fa->offload; fri.trap = fa->trap; + fri.offload_failed = fa->offload_failed; err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 45fb450b4522..c96866a53a66 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -709,6 +709,17 @@ unlock: } EXPORT_SYMBOL_GPL(inet_unhash); +/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm + * Note that we use 32bit integers (vs RFC 'short integers') + * because 2^16 is not a multiple of num_ephemeral and this + * property might be used by clever attacker. + * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, + * we use 256 instead to really give more isolation and + * privacy, this only consumes 1 KB of kernel memory. + */ +#define INET_TABLE_PERTURB_SHIFT 8 +static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT]; + int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u32 port_offset, int (*check_established)(struct inet_timewait_death_row *, @@ -722,8 +733,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct inet_bind_bucket *tb; u32 remaining, offset; int ret, i, low, high; - static u32 hint; int l3mdev; + u32 index; if (port) { head = &hinfo->bhash[inet_bhashfn(net, port, @@ -750,7 +761,10 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, if (likely(remaining > 1)) remaining &= ~1U; - offset = (hint + port_offset) % remaining; + net_get_random_once(table_perturb, sizeof(table_perturb)); + index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT); + + offset = (READ_ONCE(table_perturb[index]) + port_offset) % remaining; /* In first pass we try ports of @low parity. * inet_csk_get_port() does the opposite choice. */ @@ -804,7 +818,12 @@ next_port: return -EADDRNOTAVAIL; ok: - hint += i + 2; + /* If our first attempt found a candidate, skip next candidate + * in 1/16 of cases to add some noise. + */ + if (!i && !(prandom_u32() % 16)) + i = 2; + WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2); /* Head lock still held and bh's disabled */ inet_bind_hash(sk, tb, port); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index f9ab1fb219ec..47db1bfdaaa0 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -309,6 +309,7 @@ have_carrier: */ static void __init ic_close_devs(void) { + struct net_device *selected_dev = ic_dev->dev; struct ic_device *d, *next; struct net_device *dev; @@ -322,7 +323,7 @@ static void __init ic_close_devs(void) next = d->next; dev = d->dev; - netdev_for_each_lower_dev(ic_dev->dev, lower_dev, iter) { + netdev_for_each_lower_dev(selected_dev, lower_dev, iter) { if (dev == lower_dev) { bring_down = false; break; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index be31e2446470..02d81d79deeb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3304,6 +3304,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fri.type = rt->rt_type; fri.offload = 0; fri.trap = 0; + fri.offload_failed = 0; if (res.fa_head) { struct fib_alias *fa; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e5798b3b59d2..f55095d3ed16 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1361,7 +1361,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, + .extra2 = &two, }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e1a17c6b473c..9896ca10bb34 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2030,6 +2030,7 @@ static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma, err); } +#define TCP_VALID_ZC_MSG_FLAGS (TCP_CMSG_TS) static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, struct scm_timestamping_internal *tss); static void tcp_zc_finalize_rx_tstamp(struct sock *sk, @@ -4152,13 +4153,21 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; if (len < offsetofend(struct tcp_zerocopy_receive, length)) return -EINVAL; - if (len > sizeof(zc)) { + if (unlikely(len > sizeof(zc))) { + err = check_zeroed_user(optval + sizeof(zc), + len - sizeof(zc)); + if (err < 1) + return err == 0 ? -EINVAL : err; len = sizeof(zc); if (put_user(len, optlen)) return -EFAULT; } if (copy_from_user(&zc, optval, len)) return -EFAULT; + if (zc.reserved) + return -EINVAL; + if (zc.msg_flags & ~(TCP_VALID_ZC_MSG_FLAGS)) + return -EINVAL; lock_sock(sk); err = tcp_zerocopy_receive(sk, &zc, &tss); release_sock(sk); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 2b804fcebcc6..153ad103ba74 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -788,7 +788,7 @@ int esp6_input_done2(struct sk_buff *skb, int err) int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); int hdr_len = skb_network_header_len(skb); - if (!xo || (xo && !(xo->flags & CRYPTO_DONE))) + if (!xo || !(xo->flags & CRYPTO_DONE)) kfree(ESP_SKB_CB(skb)->tmp); if (unlikely(err)) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index f43e27555725..ef9d022e693f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -499,7 +499,7 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb, hlist_for_each_entry_rcu(tb, head, tb6_hlist) { err = fib6_table_dump(net, tb, w); - if (err < 0) + if (err) goto out; } } @@ -507,7 +507,8 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb, out: kfree(w); - return err; + /* The tree traversal function should never return a positive value. */ + return err > 0 ? -EINVAL : err; } static int fib6_dump_node(struct fib6_walker *w) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0d1784b0d65d..1536f4948e86 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5619,6 +5619,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, rtm->rtm_flags |= RTM_F_OFFLOAD; if (rt->trap) rtm->rtm_flags |= RTM_F_TRAP; + if (rt->offload_failed) + rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED; } if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0) @@ -6070,17 +6072,25 @@ errout: } void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i, - bool offload, bool trap) + bool offload, bool trap, bool offload_failed) { struct sk_buff *skb; int err; - if (f6i->offload == offload && f6i->trap == trap) + if (f6i->offload == offload && f6i->trap == trap && + f6i->offload_failed == offload_failed) return; f6i->offload = offload; f6i->trap = trap; + /* 2 means send notifications only if offload_failed was changed. */ + if (net->ipv6.sysctl.fib_notify_on_flag_change == 2 && + f6i->offload_failed == offload_failed) + return; + + f6i->offload_failed = offload_failed; + if (!rcu_access_pointer(f6i->fib6_node)) /* The route was removed from the tree, do not send * notfication. diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index b07f7c1c82a4..c2a0c78e84d4 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -31,6 +31,8 @@ #include <linux/etherdevice.h> #include <linux/bpf.h> +#define SEG6_F_ATTR(i) BIT(i) + struct seg6_local_lwt; /* callbacks used for customizing the creation and destruction of a behavior */ @@ -660,8 +662,8 @@ seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt) unsigned long parsed_optattrs = slwt->parsed_optattrs; bool legacy, vrfmode; - legacy = !!(parsed_optattrs & (1 << SEG6_LOCAL_TABLE)); - vrfmode = !!(parsed_optattrs & (1 << SEG6_LOCAL_VRFTABLE)); + legacy = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE)); + vrfmode = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE)); if (!(legacy ^ vrfmode)) /* both are absent or present: invalid DT6 mode */ @@ -883,32 +885,32 @@ static struct seg6_action_desc seg6_action_table[] = { }, { .action = SEG6_LOCAL_ACTION_END_X, - .attrs = (1 << SEG6_LOCAL_NH6), + .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6), .input = input_action_end_x, }, { .action = SEG6_LOCAL_ACTION_END_T, - .attrs = (1 << SEG6_LOCAL_TABLE), + .attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE), .input = input_action_end_t, }, { .action = SEG6_LOCAL_ACTION_END_DX2, - .attrs = (1 << SEG6_LOCAL_OIF), + .attrs = SEG6_F_ATTR(SEG6_LOCAL_OIF), .input = input_action_end_dx2, }, { .action = SEG6_LOCAL_ACTION_END_DX6, - .attrs = (1 << SEG6_LOCAL_NH6), + .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6), .input = input_action_end_dx6, }, { .action = SEG6_LOCAL_ACTION_END_DX4, - .attrs = (1 << SEG6_LOCAL_NH4), + .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH4), .input = input_action_end_dx4, }, { .action = SEG6_LOCAL_ACTION_END_DT4, - .attrs = (1 << SEG6_LOCAL_VRFTABLE), + .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE), #ifdef CONFIG_NET_L3_MASTER_DEV .input = input_action_end_dt4, .slwt_ops = { @@ -920,30 +922,30 @@ static struct seg6_action_desc seg6_action_table[] = { .action = SEG6_LOCAL_ACTION_END_DT6, #ifdef CONFIG_NET_L3_MASTER_DEV .attrs = 0, - .optattrs = (1 << SEG6_LOCAL_TABLE) | - (1 << SEG6_LOCAL_VRFTABLE), + .optattrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE) | + SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE), .slwt_ops = { .build_state = seg6_end_dt6_build, }, #else - .attrs = (1 << SEG6_LOCAL_TABLE), + .attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE), #endif .input = input_action_end_dt6, }, { .action = SEG6_LOCAL_ACTION_END_B6, - .attrs = (1 << SEG6_LOCAL_SRH), + .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH), .input = input_action_end_b6, }, { .action = SEG6_LOCAL_ACTION_END_B6_ENCAP, - .attrs = (1 << SEG6_LOCAL_SRH), + .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH), .input = input_action_end_b6_encap, .static_headroom = sizeof(struct ipv6hdr), }, { .action = SEG6_LOCAL_ACTION_END_BPF, - .attrs = (1 << SEG6_LOCAL_BPF), + .attrs = SEG6_F_ATTR(SEG6_LOCAL_BPF), .input = input_action_end_bpf, }, @@ -1366,7 +1368,7 @@ static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed, * attribute; otherwise, we call the destroy() callback. */ for (i = 0; i < max_parsed; ++i) { - if (!(parsed_attrs & (1 << i))) + if (!(parsed_attrs & SEG6_F_ATTR(i))) continue; param = &seg6_action_params[i]; @@ -1395,7 +1397,7 @@ static int parse_nla_optional_attrs(struct nlattr **attrs, int err, i; for (i = 0; i < SEG6_LOCAL_MAX + 1; ++i) { - if (!(desc->optattrs & (1 << i)) || !attrs[i]) + if (!(desc->optattrs & SEG6_F_ATTR(i)) || !attrs[i]) continue; /* once here, the i-th attribute is provided by the @@ -1408,7 +1410,7 @@ static int parse_nla_optional_attrs(struct nlattr **attrs, goto parse_optattrs_err; /* current attribute has been correctly parsed */ - parsed_optattrs |= (1 << i); + parsed_optattrs |= SEG6_F_ATTR(i); } /* store in the tunnel state all the optional attributed successfully @@ -1494,7 +1496,7 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) /* parse the required attributes */ for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { - if (desc->attrs & (1 << i)) { + if (desc->attrs & SEG6_F_ATTR(i)) { if (!attrs[i]) return -EINVAL; @@ -1599,7 +1601,7 @@ static int seg6_local_fill_encap(struct sk_buff *skb, attrs = slwt->desc->attrs | slwt->parsed_optattrs; for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { - if (attrs & (1 << i)) { + if (attrs & SEG6_F_ATTR(i)) { param = &seg6_action_params[i]; err = param->put(skb, slwt); if (err < 0) @@ -1620,30 +1622,30 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt) attrs = slwt->desc->attrs | slwt->parsed_optattrs; - if (attrs & (1 << SEG6_LOCAL_SRH)) + if (attrs & SEG6_F_ATTR(SEG6_LOCAL_SRH)) nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3); - if (attrs & (1 << SEG6_LOCAL_TABLE)) + if (attrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE)) nlsize += nla_total_size(4); - if (attrs & (1 << SEG6_LOCAL_NH4)) + if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH4)) nlsize += nla_total_size(4); - if (attrs & (1 << SEG6_LOCAL_NH6)) + if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH6)) nlsize += nla_total_size(16); - if (attrs & (1 << SEG6_LOCAL_IIF)) + if (attrs & SEG6_F_ATTR(SEG6_LOCAL_IIF)) nlsize += nla_total_size(4); - if (attrs & (1 << SEG6_LOCAL_OIF)) + if (attrs & SEG6_F_ATTR(SEG6_LOCAL_OIF)) nlsize += nla_total_size(4); - if (attrs & (1 << SEG6_LOCAL_BPF)) + if (attrs & SEG6_F_ATTR(SEG6_LOCAL_BPF)) nlsize += nla_total_size(sizeof(struct nlattr)) + nla_total_size(MAX_PROG_NAME) + nla_total_size(4); - if (attrs & (1 << SEG6_LOCAL_VRFTABLE)) + if (attrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE)) nlsize += nla_total_size(4); return nlsize; @@ -1670,7 +1672,7 @@ static int seg6_local_cmp_encap(struct lwtunnel_state *a, return 1; for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { - if (attrs_a & (1 << i)) { + if (attrs_a & SEG6_F_ATTR(i)) { param = &seg6_action_params[i]; if (param->cmp(slwt_a, slwt_b)) return 1; @@ -1692,6 +1694,15 @@ static const struct lwtunnel_encap_ops seg6_local_ops = { int __init seg6_local_init(void) { + /* If the max total number of defined attributes is reached, then your + * kernel build stops here. + * + * This check is required to avoid arithmetic overflows when processing + * behavior attributes and the maximum number of defined attributes + * exceeds the allowed value. + */ + BUILD_BUG_ON(SEG6_LOCAL_MAX + 1 > BITS_PER_TYPE(unsigned long)); + return lwtunnel_encap_add_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL); } diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 392ef01e3366..263ab43ed06b 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -167,7 +167,7 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, + .extra2 = &two, }, { } }; diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index cd9a9bd242ba..51ec8256b7fa 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -69,7 +69,7 @@ config MAC80211_MESH config MAC80211_LEDS bool "Enable LED triggers" depends on MAC80211 - depends on LEDS_CLASS + depends on LEDS_CLASS=y || LEDS_CLASS=MAC80211 select LEDS_TRIGGERS help This option enables a few LED triggers for different diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 3b71d68b3863..bb874c5d663a 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -867,7 +867,7 @@ fully_established: clear_3rdack_retransmission(ssk); mptcp_pm_subflow_established(msk, subflow); } else { - mptcp_pm_fully_established(msk); + mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC); } return true; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 1a25003fd8e3..6fd4b2c1b076 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -68,13 +68,14 @@ int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id) /* path manager event handlers */ -void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side) +void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side) { struct mptcp_pm_data *pm = &msk->pm; pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side); WRITE_ONCE(pm->server_side, server_side); + mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC); } bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) @@ -119,16 +120,13 @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk, return true; } -void mptcp_pm_fully_established(struct mptcp_sock *msk) +void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp) { struct mptcp_pm_data *pm = &msk->pm; + bool announce = false; pr_debug("msk=%p", msk); - /* try to avoid acquiring the lock below */ - if (!READ_ONCE(pm->work_pending)) - return; - spin_lock_bh(&pm->lock); /* mptcp_pm_fully_established() can be invoked by multiple @@ -138,9 +136,15 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk) if (READ_ONCE(pm->work_pending) && !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED); - msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); + if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0) + announce = true; + + msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); spin_unlock_bh(&pm->lock); + + if (announce) + mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp); } void mptcp_pm_connection_closed(struct mptcp_sock *msk) @@ -179,6 +183,8 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id, READ_ONCE(pm->accept_addr)); + mptcp_event_addr_announced(msk, addr); + spin_lock_bh(&pm->lock); if (!READ_ONCE(pm->accept_addr)) { @@ -205,6 +211,8 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id) pr_debug("msk=%p remote_id=%d", msk, rm_id); + mptcp_event_addr_removed(msk, rm_id); + spin_lock_bh(&pm->lock); mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED); pm->rm_id = rm_id; @@ -217,6 +225,8 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup) pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup); subflow->backup = bkup; + + mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC); } /* path manager helpers */ diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 23780a13b934..229fd1af2e29 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -56,6 +56,8 @@ struct pm_nl_pernet { #define MPTCP_PM_ADDR_MAX 8 #define ADD_ADDR_RETRANS_MAX 3 +static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk); + static bool addresses_equal(const struct mptcp_addr_info *a, struct mptcp_addr_info *b, bool use_port) { @@ -448,17 +450,17 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) } } -void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) +static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) { mptcp_pm_create_subflow_or_signal_addr(msk); } -void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) +static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) { mptcp_pm_create_subflow_or_signal_addr(msk); } -void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) +static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; unsigned int add_addr_accept_max; @@ -498,7 +500,7 @@ void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) mptcp_pm_nl_add_addr_send_ack(msk); } -void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) +static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; @@ -568,7 +570,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, return -EINVAL; } -void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) +static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow, *tmp; struct sock *sk = (struct sock *)msk; @@ -592,7 +594,7 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); - __mptcp_close_ssk(sk, ssk, subflow); + mptcp_close_ssk(sk, ssk, subflow); spin_lock_bh(&msk->pm.lock); msk->pm.add_addr_accepted--; @@ -605,6 +607,39 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) } } +void mptcp_pm_nl_work(struct mptcp_sock *msk) +{ + struct mptcp_pm_data *pm = &msk->pm; + + msk_owned_by_me(msk); + + spin_lock_bh(&msk->pm.lock); + + pr_debug("msk=%p status=%x", msk, pm->status); + if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { + pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); + mptcp_pm_nl_add_addr_received(msk); + } + if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { + pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); + mptcp_pm_nl_add_addr_send_ack(msk); + } + if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { + pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); + mptcp_pm_nl_rm_addr_received(msk); + } + if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { + pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); + mptcp_pm_nl_fully_established(msk); + } + if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { + pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); + mptcp_pm_nl_subflow_established(msk); + } + + spin_unlock_bh(&msk->pm.lock); +} + void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id) { struct mptcp_subflow_context *subflow, *tmp; @@ -629,7 +664,7 @@ void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id) spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); - __mptcp_close_ssk(sk, ssk, subflow); + mptcp_close_ssk(sk, ssk, subflow); spin_lock_bh(&msk->pm.lock); msk->pm.local_addr_used--; @@ -825,10 +860,14 @@ void mptcp_pm_nl_data_init(struct mptcp_sock *msk) WRITE_ONCE(pm->accept_subflow, subflows); } -#define MPTCP_PM_CMD_GRP_OFFSET 0 +#define MPTCP_PM_CMD_GRP_OFFSET 0 +#define MPTCP_PM_EV_GRP_OFFSET 1 static const struct genl_multicast_group mptcp_pm_mcgrps[] = { [MPTCP_PM_CMD_GRP_OFFSET] = { .name = MPTCP_PM_CMD_GRP_NAME, }, + [MPTCP_PM_EV_GRP_OFFSET] = { .name = MPTCP_PM_EV_GRP_NAME, + .flags = GENL_UNS_ADMIN_PERM, + }, }; static const struct nla_policy @@ -1447,6 +1486,261 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) return 0; } +static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp) +{ + genlmsg_multicast_netns(&mptcp_genl_family, net, + nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp); +} + +static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) +{ + const struct inet_sock *issk = inet_sk(ssk); + const struct mptcp_subflow_context *sf; + + if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family)) + return -EMSGSIZE; + + switch (ssk->sk_family) { + case AF_INET: + if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr)) + return -EMSGSIZE; + if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, issk->inet_daddr)) + return -EMSGSIZE; + break; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + case AF_INET6: { + const struct ipv6_pinfo *np = inet6_sk(ssk); + + if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr)) + return -EMSGSIZE; + if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &ssk->sk_v6_daddr)) + return -EMSGSIZE; + break; + } +#endif + default: + WARN_ON_ONCE(1); + return -EMSGSIZE; + } + + if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport)) + return -EMSGSIZE; + if (nla_put_be16(skb, MPTCP_ATTR_DPORT, issk->inet_dport)) + return -EMSGSIZE; + + sf = mptcp_subflow_ctx(ssk); + if (WARN_ON_ONCE(!sf)) + return -EINVAL; + + if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id)) + return -EMSGSIZE; + + if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id)) + return -EMSGSIZE; + + return 0; +} + +static int mptcp_event_put_token_and_ssk(struct sk_buff *skb, + const struct mptcp_sock *msk, + const struct sock *ssk) +{ + const struct sock *sk = (const struct sock *)msk; + const struct mptcp_subflow_context *sf; + u8 sk_err; + + if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) + return -EMSGSIZE; + + if (mptcp_event_add_subflow(skb, ssk)) + return -EMSGSIZE; + + sf = mptcp_subflow_ctx(ssk); + if (WARN_ON_ONCE(!sf)) + return -EINVAL; + + if (nla_put_u8(skb, MPTCP_ATTR_BACKUP, sf->backup)) + return -EMSGSIZE; + + if (ssk->sk_bound_dev_if && + nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if)) + return -EMSGSIZE; + + sk_err = ssk->sk_err; + if (sk_err && sk->sk_state == TCP_ESTABLISHED && + nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err)) + return -EMSGSIZE; + + return 0; +} + +static int mptcp_event_sub_established(struct sk_buff *skb, + const struct mptcp_sock *msk, + const struct sock *ssk) +{ + return mptcp_event_put_token_and_ssk(skb, msk, ssk); +} + +static int mptcp_event_sub_closed(struct sk_buff *skb, + const struct mptcp_sock *msk, + const struct sock *ssk) +{ + if (mptcp_event_put_token_and_ssk(skb, msk, ssk)) + return -EMSGSIZE; + + return 0; +} + +static int mptcp_event_created(struct sk_buff *skb, + const struct mptcp_sock *msk, + const struct sock *ssk) +{ + int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token); + + if (err) + return err; + + return mptcp_event_add_subflow(skb, ssk); +} + +void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id) +{ + struct net *net = sock_net((const struct sock *)msk); + struct nlmsghdr *nlh; + struct sk_buff *skb; + + if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) + return; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!skb) + return; + + nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, MPTCP_EVENT_REMOVED); + if (!nlh) + goto nla_put_failure; + + if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) + goto nla_put_failure; + + if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, id)) + goto nla_put_failure; + + genlmsg_end(skb, nlh); + mptcp_nl_mcast_send(net, skb, GFP_ATOMIC); + return; + +nla_put_failure: + kfree_skb(skb); +} + +void mptcp_event_addr_announced(const struct mptcp_sock *msk, + const struct mptcp_addr_info *info) +{ + struct net *net = sock_net((const struct sock *)msk); + struct nlmsghdr *nlh; + struct sk_buff *skb; + + if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) + return; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!skb) + return; + + nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, + MPTCP_EVENT_ANNOUNCED); + if (!nlh) + goto nla_put_failure; + + if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) + goto nla_put_failure; + + if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id)) + goto nla_put_failure; + + if (nla_put_be16(skb, MPTCP_ATTR_DPORT, info->port)) + goto nla_put_failure; + + switch (info->family) { + case AF_INET: + if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, info->addr.s_addr)) + goto nla_put_failure; + break; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + case AF_INET6: + if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &info->addr6)) + goto nla_put_failure; + break; +#endif + default: + WARN_ON_ONCE(1); + goto nla_put_failure; + } + + genlmsg_end(skb, nlh); + mptcp_nl_mcast_send(net, skb, GFP_ATOMIC); + return; + +nla_put_failure: + kfree_skb(skb); +} + +void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, + const struct sock *ssk, gfp_t gfp) +{ + struct net *net = sock_net((const struct sock *)msk); + struct nlmsghdr *nlh; + struct sk_buff *skb; + + if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) + return; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!skb) + return; + + nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, type); + if (!nlh) + goto nla_put_failure; + + switch (type) { + case MPTCP_EVENT_UNSPEC: + WARN_ON_ONCE(1); + break; + case MPTCP_EVENT_CREATED: + case MPTCP_EVENT_ESTABLISHED: + if (mptcp_event_created(skb, msk, ssk) < 0) + goto nla_put_failure; + break; + case MPTCP_EVENT_CLOSED: + if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token) < 0) + goto nla_put_failure; + break; + case MPTCP_EVENT_ANNOUNCED: + case MPTCP_EVENT_REMOVED: + /* call mptcp_event_addr_announced()/removed instead */ + WARN_ON_ONCE(1); + break; + case MPTCP_EVENT_SUB_ESTABLISHED: + case MPTCP_EVENT_SUB_PRIORITY: + if (mptcp_event_sub_established(skb, msk, ssk) < 0) + goto nla_put_failure; + break; + case MPTCP_EVENT_SUB_CLOSED: + if (mptcp_event_sub_closed(skb, msk, ssk) < 0) + goto nla_put_failure; + break; + } + + genlmsg_end(skb, nlh); + mptcp_nl_mcast_send(net, skb, gfp); + return; + +nla_put_failure: + kfree_skb(skb); +} + static const struct genl_small_ops mptcp_pm_ops[] = { { .cmd = MPTCP_PM_CMD_ADD_ADDR, diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b9f16a1535d2..c2a8392254dc 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2114,8 +2114,8 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) * so we need to use tcp_close() after detaching them from the mptcp * parent socket. */ -void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, - struct mptcp_subflow_context *subflow) +static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, + struct mptcp_subflow_context *subflow) { list_del(&subflow->node); @@ -2147,40 +2147,17 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, sock_put(ssk); } -static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu) +void mptcp_close_ssk(struct sock *sk, struct sock *ssk, + struct mptcp_subflow_context *subflow) { - return 0; + if (sk->sk_state == TCP_ESTABLISHED) + mptcp_event(MPTCP_EVENT_SUB_CLOSED, mptcp_sk(sk), ssk, GFP_KERNEL); + __mptcp_close_ssk(sk, ssk, subflow); } -static void pm_work(struct mptcp_sock *msk) +static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu) { - struct mptcp_pm_data *pm = &msk->pm; - - spin_lock_bh(&msk->pm.lock); - - pr_debug("msk=%p status=%x", msk, pm->status); - if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { - pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); - mptcp_pm_nl_add_addr_received(msk); - } - if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { - pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); - mptcp_pm_nl_add_addr_send_ack(msk); - } - if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { - pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); - mptcp_pm_nl_rm_addr_received(msk); - } - if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { - pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); - mptcp_pm_nl_fully_established(msk); - } - if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { - pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); - mptcp_pm_nl_subflow_established(msk); - } - - spin_unlock_bh(&msk->pm.lock); + return 0; } static void __mptcp_close_subflow(struct mptcp_sock *msk) @@ -2195,7 +2172,11 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk) if (inet_sk_state_load(ssk) != TCP_CLOSE) continue; - __mptcp_close_ssk((struct sock *)msk, ssk, subflow); + /* 'subflow_data_ready' will re-sched once rx queue is empty */ + if (!skb_queue_empty_lockless(&ssk->sk_receive_queue)) + continue; + + mptcp_close_ssk((struct sock *)msk, ssk, subflow); } } @@ -2267,11 +2248,8 @@ static void mptcp_worker(struct work_struct *work) mptcp_check_fastclose(msk); - if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) - __mptcp_close_subflow(msk); - if (msk->pm.status) - pm_work(msk); + mptcp_pm_nl_work(msk); if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) mptcp_check_for_eof(msk); @@ -2291,6 +2269,9 @@ static void mptcp_worker(struct work_struct *work) goto unlock; } + if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) + __mptcp_close_subflow(msk); + if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) goto unlock; @@ -2607,6 +2588,10 @@ cleanup: release_sock(sk); if (do_cancel_work) mptcp_cancel_work(sk); + + if (mptcp_sk(sk)->token) + mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL); + sock_put(sk); } @@ -3049,7 +3034,7 @@ void mptcp_finish_connect(struct sock *ssk) WRITE_ONCE(msk->can_ack, 1); WRITE_ONCE(msk->snd_una, msk->write_seq); - mptcp_pm_new_connection(msk, 0); + mptcp_pm_new_connection(msk, ssk, 0); mptcp_rcv_space_init(msk, ssk); } @@ -3078,7 +3063,7 @@ bool mptcp_finish_join(struct sock *ssk) return false; if (!msk->pm.server_side) - return true; + goto out; if (!mptcp_pm_allow_new_subflow(msk)) return false; @@ -3105,6 +3090,8 @@ bool mptcp_finish_join(struct sock *ssk) if (parent_sock && !ssk->sk_socket) mptcp_sock_graft(ssk, parent_sock); subflow->map_seq = READ_ONCE(msk->ack_seq); +out: + mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC); return true; } @@ -3281,9 +3268,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, struct mptcp_sock *msk = mptcp_sk(newsock->sk); struct mptcp_subflow_context *subflow; struct sock *newsk = newsock->sk; - bool slowpath; - slowpath = lock_sock_fast(newsk); + lock_sock(newsk); /* PM/worker can now acquire the first subflow socket * lock without racing with listener queue cleanup, @@ -3293,7 +3279,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, list_add(&subflow->node, &msk->conn_list); sock_hold(msk->first); if (mptcp_is_fully_established(newsk)) - mptcp_pm_fully_established(msk); + mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL); mptcp_copy_inaddrs(newsk, msk->first); mptcp_rcv_space_init(msk, msk->first); @@ -3309,7 +3295,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssk->sk_socket) mptcp_sock_graft(ssk, newsock); } - unlock_sock_fast(newsk, slowpath); + release_sock(newsk); } if (inet_csk_listen_poll(ssock->sk)) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 73a923d02aad..d31edbae8da8 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -10,6 +10,7 @@ #include <linux/random.h> #include <net/tcp.h> #include <net/inet_connection_sock.h> +#include <uapi/linux/mptcp.h> #define MPTCP_SUPPORTED_VERSION 1 @@ -539,8 +540,8 @@ void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, bool mptcp_subflow_data_available(struct sock *sk); void __init mptcp_subflow_init(void); void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); -void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, - struct mptcp_subflow_context *subflow); +void mptcp_close_ssk(struct sock *sk, struct sock *ssk, + struct mptcp_subflow_context *subflow); void mptcp_subflow_reset(struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); @@ -639,8 +640,8 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); -void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side); -void mptcp_pm_fully_established(struct mptcp_sock *msk); +void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); +void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); void mptcp_pm_connection_closed(struct mptcp_sock *msk); void mptcp_pm_subflow_established(struct mptcp_sock *msk, @@ -666,6 +667,11 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id); int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id); +void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, + const struct sock *ssk, gfp_t gfp); +void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info); +void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); + static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL); @@ -713,11 +719,7 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_data_init(struct mptcp_sock *msk); -void mptcp_pm_nl_fully_established(struct mptcp_sock *msk); -void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk); -void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk); -void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk); -void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk); +void mptcp_pm_nl_work(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 280da418d60b..ce2dea2a6e0a 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -675,7 +675,7 @@ create_child: * created mptcp socket */ new_msk->sk_destruct = mptcp_sock_destruct; - mptcp_pm_new_connection(mptcp_sk(new_msk), 1); + mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1); mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); ctx->conn = new_msk; new_msk = NULL; @@ -953,6 +953,22 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, subflow->map_valid = 0; } +/* sched mptcp worker to remove the subflow if no more data is pending */ +static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk) +{ + struct sock *sk = (struct sock *)msk; + + if (likely(ssk->sk_state != TCP_CLOSE)) + return; + + if (skb_queue_empty(&ssk->sk_receive_queue) && + !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) { + sock_hold(sk); + if (!schedule_work(&msk->work)) + sock_put(sk); + } +} + static bool subflow_check_data_avail(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); @@ -991,11 +1007,11 @@ static bool subflow_check_data_avail(struct sock *ssk) } if (status != MAPPING_OK) - return false; + goto no_data; skb = skb_peek(&ssk->sk_receive_queue); if (WARN_ON_ONCE(!skb)) - return false; + goto no_data; /* if msk lacks the remote key, this subflow must provide an * MP_CAPABLE-based mapping @@ -1029,6 +1045,9 @@ static bool subflow_check_data_avail(struct sock *ssk) } return true; +no_data: + subflow_sched_work_if_closed(msk, ssk); + return false; fatal: /* fatal protocol error, close the socket */ /* This barrier is coupled with smp_rmb() in tcp_poll() */ @@ -1413,6 +1432,8 @@ static void subflow_state_change(struct sock *sk) if (mptcp_subflow_data_available(sk)) mptcp_data_ready(parent, sk); + subflow_sched_work_if_closed(mptcp_sk(parent), sk); + if (__mptcp_check_fallback(mptcp_sk(parent)) && !subflow->rx_eof && subflow_is_done(sk)) { subflow->rx_eof = 1; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 234b7cab37c3..ff0168736f6e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1229,7 +1229,8 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, * Let nf_ct_resolve_clash() deal with this later. */ if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) && + nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) continue; NF_CT_STAT_INC_ATOMIC(net, found); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 55fca71ace26..5fa657b8e03d 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -399,7 +399,7 @@ static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, return -1; tcph = (void *)(skb_network_header(skb) + thoff); - inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true); + inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false); return 0; } @@ -415,7 +415,7 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, udph = (void *)(skb_network_header(skb) + thoff); if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace2(&udph->check, skb, port, - new_port, true); + new_port, false); if (!udph->check) udph->check = CSUM_MANGLED_0; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 01dc0a169a78..ab93a353651a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5287,6 +5287,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, struct nft_expr *expr_array[NFT_SET_EXPR_MAX] = {}; struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; u8 genmask = nft_genmask_next(ctx->net); + u32 flags = 0, size = 0, num_exprs = 0; struct nft_set_ext_tmpl tmpl; struct nft_set_ext *ext, *ext2; struct nft_set_elem elem; @@ -5296,7 +5297,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, struct nft_data_desc desc; enum nft_registers dreg; struct nft_trans *trans; - u32 flags = 0, size = 0; u64 timeout; u64 expiration; int err, i; @@ -5362,7 +5362,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_EXPR]) { struct nft_expr *expr; - if (set->num_exprs != 1) + if (set->num_exprs && set->num_exprs != 1) return -EOPNOTSUPP; expr = nft_set_elem_expr_alloc(ctx, set, @@ -5371,8 +5371,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return PTR_ERR(expr); expr_array[0] = expr; + num_exprs = 1; - if (set->exprs[0] && set->exprs[0]->ops != expr->ops) { + if (set->num_exprs && set->exprs[0]->ops != expr->ops) { err = -EOPNOTSUPP; goto err_set_elem_expr; } @@ -5381,12 +5382,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, struct nlattr *tmp; int left; - if (set->num_exprs == 0) - return -EOPNOTSUPP; - i = 0; nla_for_each_nested(tmp, nla[NFTA_SET_ELEM_EXPRESSIONS], left) { - if (i == set->num_exprs) { + if (i == NFT_SET_EXPR_MAX || + (set->num_exprs && set->num_exprs == i)) { err = -E2BIG; goto err_set_elem_expr; } @@ -5400,14 +5399,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, goto err_set_elem_expr; } expr_array[i] = expr; + num_exprs++; - if (expr->ops != set->exprs[i]->ops) { + if (set->num_exprs && expr->ops != set->exprs[i]->ops) { err = -EOPNOTSUPP; goto err_set_elem_expr; } i++; } - if (set->num_exprs != i) { + if (set->num_exprs && set->num_exprs != i) { err = -EOPNOTSUPP; goto err_set_elem_expr; } @@ -5415,6 +5415,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = nft_set_elem_expr_clone(ctx, set, expr_array); if (err < 0) goto err_set_elem_expr_clone; + + num_exprs = set->num_exprs; } err = nft_setelem_parse_key(ctx, set, &elem.key.val, @@ -5439,8 +5441,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); } - if (set->num_exprs) { - for (i = 0; i < set->num_exprs; i++) + if (num_exprs) { + for (i = 0; i < num_exprs; i++) size += expr_array[i]->ops->size; nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS, @@ -5528,7 +5530,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, *nft_set_ext_obj(ext) = obj; obj->use++; } - for (i = 0; i < set->num_exprs; i++) + for (i = 0; i < num_exprs; i++) nft_set_elem_expr_setup(ext, i, expr_array); trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); @@ -5590,7 +5592,7 @@ err_parse_key_end: err_parse_key: nft_data_release(&elem.key.val, NFT_DATA_VALUE); err_set_elem_expr: - for (i = 0; i < set->num_exprs && expr_array[i]; i++) + for (i = 0; i < num_exprs && expr_array[i]; i++) nft_expr_destroy(ctx, expr_array[i]); err_set_elem_expr_clone: return err; @@ -8986,6 +8988,17 @@ int __nft_release_basechain(struct nft_ctx *ctx) } EXPORT_SYMBOL_GPL(__nft_release_basechain); +static void __nft_release_hooks(struct net *net) +{ + struct nft_table *table; + struct nft_chain *chain; + + list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(chain, &table->chains, list) + nf_tables_unregister_hook(net, table, chain); + } +} + static void __nft_release_tables(struct net *net) { struct nft_flowtable *flowtable, *nf; @@ -9001,10 +9014,6 @@ static void __nft_release_tables(struct net *net) list_for_each_entry_safe(table, nt, &net->nft.tables, list) { ctx.family = table->family; - - list_for_each_entry(chain, &table->chains, list) - nf_tables_unregister_hook(net, table, chain); - /* No packets are walking on these chains anymore. */ ctx.table = table; list_for_each_entry(chain, &table->chains, list) { ctx.chain = chain; @@ -9053,6 +9062,11 @@ static int __net_init nf_tables_init_net(struct net *net) return 0; } +static void __net_exit nf_tables_pre_exit_net(struct net *net) +{ + __nft_release_hooks(net); +} + static void __net_exit nf_tables_exit_net(struct net *net) { mutex_lock(&net->nft.commit_mutex); @@ -9066,8 +9080,9 @@ static void __net_exit nf_tables_exit_net(struct net *net) } static struct pernet_operations nf_tables_net_ops = { - .init = nf_tables_init_net, - .exit = nf_tables_exit_net, + .init = nf_tables_init_net, + .pre_exit = nf_tables_pre_exit_net, + .exit = nf_tables_exit_net, }; static int __init nf_tables_module_init(void) diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 606411869698..0446307516cd 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -152,7 +152,8 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e) /* * Drop entries with timestamps older then 'time'. */ -static void recent_entry_reap(struct recent_table *t, unsigned long time) +static void recent_entry_reap(struct recent_table *t, unsigned long time, + struct recent_entry *working, bool update) { struct recent_entry *e; @@ -162,6 +163,12 @@ static void recent_entry_reap(struct recent_table *t, unsigned long time) e = list_entry(t->lru_list.next, struct recent_entry, lru_list); /* + * Do not reap the entry which are going to be updated. + */ + if (e == working && update) + return; + + /* * The last time stamp is the most recent. */ if (time_after(time, e->stamps[e->index-1])) @@ -303,7 +310,8 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par) /* info->seconds must be non-zero */ if (info->check_set & XT_RECENT_REAP) - recent_entry_reap(t, time); + recent_entry_reap(t, time, e, + info->check_set & XT_RECENT_UPDATE && ret); } if (info->check_set & XT_RECENT_SET || diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index c992424e4d63..2d6fdf40df66 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1360,11 +1360,43 @@ static struct genl_family genl_ctrl __ro_after_init = { .netnsok = true, }; +static int genl_bind(struct net *net, int group) +{ + const struct genl_family *family; + unsigned int id; + int ret = 0; + + genl_lock_all(); + + idr_for_each_entry(&genl_fam_idr, family, id) { + const struct genl_multicast_group *grp; + int i; + + if (family->n_mcgrps == 0) + continue; + + i = group - family->mcgrp_offset; + if (i < 0 || i >= family->n_mcgrps) + continue; + + grp = &family->mcgrps[i]; + if ((grp->flags & GENL_UNS_ADMIN_PERM) && + !ns_capable(net->user_ns, CAP_NET_ADMIN)) + ret = -EPERM; + + break; + } + + genl_unlock_all(); + return ret; +} + static int __net_init genl_pernet_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = genl_rcv, .flags = NL_CFG_F_NONROOT_RECV, + .bind = genl_bind, }; /* we'll bump the group number right afterwards */ diff --git a/net/qrtr/tun.c b/net/qrtr/tun.c index 15ce9b642b25..b238c40a9984 100644 --- a/net/qrtr/tun.c +++ b/net/qrtr/tun.c @@ -80,6 +80,12 @@ static ssize_t qrtr_tun_write_iter(struct kiocb *iocb, struct iov_iter *from) ssize_t ret; void *kbuf; + if (!len) + return -EINVAL; + + if (len > KMALLOC_MAX_SIZE) + return -ENOMEM; + kbuf = kzalloc(len, GFP_KERNEL); if (!kbuf) return -ENOMEM; diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index d706bb408365..0885b22e5c0e 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -8,6 +8,7 @@ config AF_RXRPC depends on INET select CRYPTO select KEYS + select NET_UDP_TUNNEL help Say Y or M here to include support for RxRPC session sockets (just the transport part, not the presentation part: (un)marshalling is diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index c845594b663f..4eb91d958a48 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -548,8 +548,6 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) rxrpc_disconnect_call(call); if (call->security) call->security->free_call_crypto(call); - - rxrpc_cleanup_ring(call); _leave(""); } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 33b49367d575..a4111408ffd0 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -107,54 +107,44 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, */ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) { + struct udp_tunnel_sock_cfg tuncfg = {NULL}; + struct sockaddr_rxrpc *srx = &local->srx; + struct udp_port_cfg udp_conf = {0}; struct sock *usk; int ret; _enter("%p{%d,%d}", - local, local->srx.transport_type, local->srx.transport.family); - - /* create a socket to represent the local endpoint */ - ret = sock_create_kern(net, local->srx.transport.family, - local->srx.transport_type, 0, &local->socket); + local, srx->transport_type, srx->transport.family); + + udp_conf.family = srx->transport.family; + if (udp_conf.family == AF_INET) { + udp_conf.local_ip = srx->transport.sin.sin_addr; + udp_conf.local_udp_port = srx->transport.sin.sin_port; +#if IS_ENABLED(CONFIG_AF_RXRPC_IPV6) + } else { + udp_conf.local_ip6 = srx->transport.sin6.sin6_addr; + udp_conf.local_udp_port = srx->transport.sin6.sin6_port; +#endif + } + ret = udp_sock_create(net, &udp_conf, &local->socket); if (ret < 0) { _leave(" = %d [socket]", ret); return ret; } + tuncfg.encap_type = UDP_ENCAP_RXRPC; + tuncfg.encap_rcv = rxrpc_input_packet; + tuncfg.sk_user_data = local; + setup_udp_tunnel_sock(net, local->socket, &tuncfg); + /* set the socket up */ usk = local->socket->sk; - inet_sk(usk)->mc_loop = 0; - - /* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */ - inet_inc_convert_csum(usk); - - rcu_assign_sk_user_data(usk, local); - - udp_sk(usk)->encap_type = UDP_ENCAP_RXRPC; - udp_sk(usk)->encap_rcv = rxrpc_input_packet; - udp_sk(usk)->encap_destroy = NULL; - udp_sk(usk)->gro_receive = NULL; - udp_sk(usk)->gro_complete = NULL; - - udp_tunnel_encap_enable(local->socket); usk->sk_error_report = rxrpc_error_report; - /* if a local address was supplied then bind it */ - if (local->srx.transport_len > sizeof(sa_family_t)) { - _debug("bind"); - ret = kernel_bind(local->socket, - (struct sockaddr *)&local->srx.transport, - local->srx.transport_len); - if (ret < 0) { - _debug("bind failed %d", ret); - goto error; - } - } - - switch (local->srx.transport.family) { + switch (srx->transport.family) { case AF_INET6: /* we want to receive ICMPv6 errors */ - ip6_sock_set_recverr(local->socket->sk); + ip6_sock_set_recverr(usk); /* Fall through and set IPv4 options too otherwise we don't get * errors from IPv4 packets sent through the IPv6 socket. @@ -162,13 +152,13 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) fallthrough; case AF_INET: /* we want to receive ICMP errors */ - ip_sock_set_recverr(local->socket->sk); + ip_sock_set_recverr(usk); /* we want to set the don't fragment bit */ - ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DO); + ip_sock_set_mtu_discover(usk, IP_PMTUDISC_DO); /* We want receive timestamps. */ - sock_enable_timestamps(local->socket->sk); + sock_enable_timestamps(usk); break; default: @@ -177,15 +167,6 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) _leave(" = 0"); return 0; - -error: - kernel_sock_shutdown(local->socket, SHUT_RDWR); - local->socket->sk->sk_user_data = NULL; - sock_release(local->socket); - local->socket = NULL; - - _leave(" = %d", ret); - return ret; } /* diff --git a/net/sctp/proc.c b/net/sctp/proc.c index f7da88ae20a5..982a87b3e11f 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -215,6 +215,12 @@ static void sctp_transport_seq_stop(struct seq_file *seq, void *v) { struct sctp_ht_iter *iter = seq->private; + if (v && v != SEQ_START_TOKEN) { + struct sctp_transport *transport = v; + + sctp_transport_put(transport); + } + sctp_transport_walk_stop(&iter->hti); } @@ -222,6 +228,12 @@ static void *sctp_transport_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sctp_ht_iter *iter = seq->private; + if (v && v != SEQ_START_TOKEN) { + struct sctp_transport *transport = v; + + sctp_transport_put(transport); + } + ++*pos; return sctp_transport_get_next(seq_file_net(seq), &iter->hti); @@ -277,8 +289,6 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) sk->sk_rcvbuf); seq_printf(seq, "\n"); - sctp_transport_put(transport); - return 0; } @@ -354,8 +364,6 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "\n"); } - sctp_transport_put(transport); - return 0; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c9766d07eb81..5a809c64dc7b 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1113,14 +1113,15 @@ static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg, unsigned int offset, len, remaining; struct bio_vec *bvec; - bvec = xdr->bvec; - offset = xdr->page_base; + bvec = xdr->bvec + (xdr->page_base >> PAGE_SHIFT); + offset = offset_in_page(xdr->page_base); remaining = xdr->page_len; flags = MSG_MORE | MSG_SENDPAGE_NOTLAST; while (remaining > 0) { if (remaining <= PAGE_SIZE && tail->iov_len == 0) flags = 0; - len = min(remaining, bvec->bv_len); + + len = min(remaining, bvec->bv_len - offset); ret = kernel_sendpage(sock, bvec->bv_page, bvec->bv_offset + offset, len, flags); diff --git a/net/tls/Kconfig b/net/tls/Kconfig index fa0724fd84b4..0cdc1f7b6b08 100644 --- a/net/tls/Kconfig +++ b/net/tls/Kconfig @@ -21,6 +21,7 @@ config TLS_DEVICE bool "Transport Layer Security HW offload" depends on TLS select SOCK_VALIDATE_XMIT + select SOCK_RX_QUEUE_MAPPING default n help Enable kernel support for HW offload of the TLS protocol. diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 6894f21dc147..5546710d8ac1 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -943,10 +943,12 @@ static int vsock_shutdown(struct socket *sock, int mode) */ sk = sock->sk; + + lock_sock(sk); if (sock->state == SS_UNCONNECTED) { err = -ENOTCONN; if (sk->sk_type == SOCK_STREAM) - return err; + goto out; } else { sock->state = SS_DISCONNECTING; err = 0; @@ -955,10 +957,8 @@ static int vsock_shutdown(struct socket *sock, int mode) /* Receive and send shutdowns are treated alike. */ mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN); if (mode) { - lock_sock(sk); sk->sk_shutdown |= mode; sk->sk_state_change(sk); - release_sock(sk); if (sk->sk_type == SOCK_STREAM) { sock_reset_flag(sk, SOCK_DONE); @@ -966,6 +966,8 @@ static int vsock_shutdown(struct socket *sock, int mode) } } +out: + release_sock(sk); return err; } @@ -1233,7 +1235,7 @@ static int vsock_transport_cancel_pkt(struct vsock_sock *vsk) { const struct vsock_transport *transport = vsk->transport; - if (!transport->cancel_pkt) + if (!transport || !transport->cancel_pkt) return -EOPNOTSUPP; return transport->cancel_pkt(vsk); @@ -1243,7 +1245,6 @@ static void vsock_connect_timeout(struct work_struct *work) { struct sock *sk; struct vsock_sock *vsk; - int cancel = 0; vsk = container_of(work, struct vsock_sock, connect_work.work); sk = sk_vsock(vsk); @@ -1254,11 +1255,9 @@ static void vsock_connect_timeout(struct work_struct *work) sk->sk_state = TCP_CLOSE; sk->sk_err = ETIMEDOUT; sk->sk_error_report(sk); - cancel = 1; + vsock_transport_cancel_pkt(vsk); } release_sock(sk); - if (cancel) - vsock_transport_cancel_pkt(vsk); sock_put(sk); } diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 630b851f8150..cc3bae2659e7 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -474,14 +474,10 @@ static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode) static int hvs_shutdown(struct vsock_sock *vsk, int mode) { - struct sock *sk = sk_vsock(vsk); - if (!(mode & SEND_SHUTDOWN)) return 0; - lock_sock(sk); hvs_shutdown_lock_held(vsk->trans, mode); - release_sock(sk); return 0; } diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 5956939eebb7..e4370b1b7494 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1130,8 +1130,6 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, vsk = vsock_sk(sk); - space_available = virtio_transport_space_update(sk, pkt); - lock_sock(sk); /* Check if sk has been closed before lock_sock */ @@ -1142,6 +1140,8 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, goto free_pkt; } + space_available = virtio_transport_space_update(sk, pkt); + /* Update CID in case it has changed after a transport reset event */ vsk->local_addr.svm_cid = dst.svm_cid; diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 697cdcfbb5e1..495b1f5c979b 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -296,7 +296,8 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) } mtu = dst_mtu(dst); - if (skb->len > mtu) { + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))) { skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IPV6)) { @@ -564,6 +565,11 @@ static void xfrmi_dev_setup(struct net_device *dev) eth_broadcast_addr(dev->broadcast); } +#define XFRMI_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_GSO_SOFTWARE | \ + NETIF_F_HW_CSUM) + static int xfrmi_dev_init(struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); @@ -581,6 +587,8 @@ static int xfrmi_dev_init(struct net_device *dev) } dev->features |= NETIF_F_LLTX; + dev->features |= XFRMI_FEATURES; + dev->hw_features |= XFRMI_FEATURES; if (phydev) { dev->needed_headroom = phydev->needed_headroom; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0727ac853b55..5a0ef4361e43 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2504,7 +2504,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]), sizeof(*encap), GFP_KERNEL); if (!encap) - return 0; + return -ENOMEM; } err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap); diff --git a/scripts/Makefile b/scripts/Makefile index b5418ec587fb..9de3c03b94aa 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -3,6 +3,9 @@ # scripts contains sources for various helper programs used throughout # the kernel for the build process. +CRYPTO_LIBS = $(shell pkg-config --libs libcrypto 2> /dev/null || echo -lcrypto) +CRYPTO_CFLAGS = $(shell pkg-config --cflags libcrypto 2> /dev/null) + hostprogs-always-$(CONFIG_BUILD_BIN2C) += bin2c hostprogs-always-$(CONFIG_KALLSYMS) += kallsyms hostprogs-always-$(BUILD_C_RECORDMCOUNT) += recordmcount @@ -14,8 +17,9 @@ hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include -HOSTLDLIBS_sign-file = -lcrypto -HOSTLDLIBS_extract-cert = -lcrypto +HOSTLDLIBS_sign-file = $(CRYPTO_LIBS) +HOSTCFLAGS_extract-cert.o = $(CRYPTO_CFLAGS) +HOSTLDLIBS_extract-cert = $(CRYPTO_LIBS) ifdef CONFIG_UNWINDER_ORC ifeq ($(ARCH),x86_64) diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index 652e9542043f..dcd8d8750b8b 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Copyright 2004 Matt Mackall <mpm@selenic.com> # diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 92e888ed939f..1afe3af1cc09 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3390,13 +3390,6 @@ sub process { } } -# discourage the use of boolean for type definition attributes of Kconfig options - if ($realfile =~ /Kconfig/ && - $line =~ /^\+\s*\bboolean\b/) { - WARN("CONFIG_TYPE_BOOLEAN", - "Use of boolean is deprecated, please use bool instead.\n" . $herecurr); - } - if (($realfile =~ /Makefile.*/ || $realfile =~ /Kbuild.*/) && ($line =~ /\+(EXTRA_[A-Z]+FLAGS).*/)) { my $flag = $1; diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py index 19963708bcf8..8ddb5d099029 100755 --- a/scripts/clang-tools/gen_compile_commands.py +++ b/scripts/clang-tools/gen_compile_commands.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 # # Copyright (C) Google LLC, 2018 diff --git a/scripts/clang-tools/run-clang-tools.py b/scripts/clang-tools/run-clang-tools.py index fa7655c7cec0..f754415af398 100755 --- a/scripts/clang-tools/run-clang-tools.py +++ b/scripts/clang-tools/run-clang-tools.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 # # Copyright (C) Google LLC, 2020 diff --git a/scripts/diffconfig b/scripts/diffconfig index 627eba5849b5..d5da5fa05d1d 100755 --- a/scripts/diffconfig +++ b/scripts/diffconfig @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 # # diffconfig - a tool to compare .config files. diff --git a/scripts/dummy-tools/gcc b/scripts/dummy-tools/gcc index 33487e99d83e..5c113cad5601 100755 --- a/scripts/dummy-tools/gcc +++ b/scripts/dummy-tools/gcc @@ -75,16 +75,12 @@ if arg_contain -S "$@"; then fi fi -# For scripts/gcc-plugin.sh +# To set GCC_PLUGINS if arg_contain -print-file-name=plugin "$@"; then plugin_dir=$(mktemp -d) - sed -n 's/.*#include "\(.*\)"/\1/p' $(dirname $0)/../gcc-plugins/gcc-common.h | - while read header - do - mkdir -p $plugin_dir/include/$(dirname $header) - touch $plugin_dir/include/$header - done + mkdir -p $plugin_dir/include + touch $plugin_dir/include/plugin-version.h echo $plugin_dir exit 0 diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec index 0fdb31a790a8..48d141e3ec56 100755 --- a/scripts/jobserver-exec +++ b/scripts/jobserver-exec @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0+ # # This determines how many parallel tasks "make" is expecting, as it is diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 7ecd2ccba531..54ad86d13784 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -112,6 +112,12 @@ static bool is_ignored_symbol(const char *name, char type) "__crc_", /* modversions */ "__efistub_", /* arm64 EFI stub namespace */ "__kvm_nvhe_", /* arm64 non-VHE KVM namespace */ + "__AArch64ADRPThunk_", /* arm64 lld */ + "__ARMV5PILongThunk_", /* arm lld */ + "__ARMV7PILongThunk_", + "__ThumbV7PILongThunk_", + "__LA25Thunk_", /* mips lld */ + "__microLA25Thunk_", NULL }; diff --git a/scripts/kconfig/mconf-cfg.sh b/scripts/kconfig/mconf-cfg.sh index fcd4acd4e9cb..b520e407a8eb 100755 --- a/scripts/kconfig/mconf-cfg.sh +++ b/scripts/kconfig/mconf-cfg.sh @@ -35,7 +35,7 @@ fi # As a final fallback before giving up, check if $HOSTCC knows of a default # ncurses installation (e.g. from a vendor-specific sysroot). -if echo '#include <ncurses.h>' | "${HOSTCC}" -E - >/dev/null 2>&1; then +if echo '#include <ncurses.h>' | ${HOSTCC} -E - >/dev/null 2>&1; then echo cflags=\"-D_GNU_SOURCE\" echo libs=\"-lncurses\" exit 0 diff --git a/security/commoncap.c b/security/commoncap.c index bacc1111d871..26c1cb725dcb 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -371,10 +371,11 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, { int size, ret; kuid_t kroot; + u32 nsmagic, magic; uid_t root, mappedroot; char *tmpbuf = NULL; struct vfs_cap_data *cap; - struct vfs_ns_cap_data *nscap; + struct vfs_ns_cap_data *nscap = NULL; struct dentry *dentry; struct user_namespace *fs_ns; @@ -396,46 +397,61 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, fs_ns = inode->i_sb->s_user_ns; cap = (struct vfs_cap_data *) tmpbuf; if (is_v2header((size_t) ret, cap)) { - /* If this is sizeof(vfs_cap_data) then we're ok with the - * on-disk value, so return that. */ - if (alloc) - *buffer = tmpbuf; - else - kfree(tmpbuf); - return ret; - } else if (!is_v3header((size_t) ret, cap)) { - kfree(tmpbuf); - return -EINVAL; + root = 0; + } else if (is_v3header((size_t) ret, cap)) { + nscap = (struct vfs_ns_cap_data *) tmpbuf; + root = le32_to_cpu(nscap->rootid); + } else { + size = -EINVAL; + goto out_free; } - nscap = (struct vfs_ns_cap_data *) tmpbuf; - root = le32_to_cpu(nscap->rootid); kroot = make_kuid(fs_ns, root); /* If the root kuid maps to a valid uid in current ns, then return * this as a nscap. */ mappedroot = from_kuid(current_user_ns(), kroot); if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) { + size = sizeof(struct vfs_ns_cap_data); if (alloc) { - *buffer = tmpbuf; + if (!nscap) { + /* v2 -> v3 conversion */ + nscap = kzalloc(size, GFP_ATOMIC); + if (!nscap) { + size = -ENOMEM; + goto out_free; + } + nsmagic = VFS_CAP_REVISION_3; + magic = le32_to_cpu(cap->magic_etc); + if (magic & VFS_CAP_FLAGS_EFFECTIVE) + nsmagic |= VFS_CAP_FLAGS_EFFECTIVE; + memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32); + nscap->magic_etc = cpu_to_le32(nsmagic); + } else { + /* use allocated v3 buffer */ + tmpbuf = NULL; + } nscap->rootid = cpu_to_le32(mappedroot); - } else - kfree(tmpbuf); - return size; + *buffer = nscap; + } + goto out_free; } if (!rootid_owns_currentns(kroot)) { - kfree(tmpbuf); - return -EOPNOTSUPP; + size = -EOVERFLOW; + goto out_free; } /* This comes from a parent namespace. Return as a v2 capability */ size = sizeof(struct vfs_cap_data); if (alloc) { - *buffer = kmalloc(size, GFP_ATOMIC); - if (*buffer) { - struct vfs_cap_data *cap = *buffer; - __le32 nsmagic, magic; + if (nscap) { + /* v3 -> v2 conversion */ + cap = kzalloc(size, GFP_ATOMIC); + if (!cap) { + size = -ENOMEM; + goto out_free; + } magic = VFS_CAP_REVISION_2; nsmagic = le32_to_cpu(nscap->magic_etc); if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE) @@ -443,9 +459,12 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32); cap->magic_etc = cpu_to_le32(magic); } else { - size = -ENOMEM; + /* use unconverted v2 */ + tmpbuf = NULL; } + *buffer = cap; } +out_free: kfree(tmpbuf); return size; } diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 389ea5209a83..a7c4f0772e53 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1834,12 +1834,15 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp) int get_epb(int cpu) { char path[128 + PATH_BYTES]; + unsigned long long msr; int ret, epb = -1; FILE *fp; sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu); - fp = fopen_or_die(path, "r"); + fp = fopen(path, "r"); + if (!fp) + goto msr_fallback; ret = fscanf(fp, "%d", &epb); if (ret != 1) @@ -1848,6 +1851,11 @@ int get_epb(int cpu) fclose(fp); return epb; + +msr_fallback: + get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); + + return msr & 0xf; } void get_apic_id(struct thread_data *t) diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c index cac891028cd1..3e3a5f518864 100644 --- a/tools/testing/nvdimm/config_check.c +++ b/tools/testing/nvdimm/config_check.c @@ -12,7 +12,8 @@ void check(void) BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_PFN)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK)); - BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); + if (IS_ENABLED(CONFIG_ACPI_NFIT)) + BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX)); BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX_PMEM)); } diff --git a/tools/testing/nvdimm/test/Kbuild b/tools/testing/nvdimm/test/Kbuild index 75baebf8f4ba..197bcb2b7f35 100644 --- a/tools/testing/nvdimm/test/Kbuild +++ b/tools/testing/nvdimm/test/Kbuild @@ -5,5 +5,9 @@ ccflags-y += -I$(srctree)/drivers/acpi/nfit/ obj-m += nfit_test.o obj-m += nfit_test_iomap.o -nfit_test-y := nfit.o +ifeq ($(CONFIG_ACPI_NFIT),m) + nfit_test-y := nfit.o +else + nfit_test-y := ndtest.o +endif nfit_test_iomap-y := iomap.o diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c new file mode 100644 index 000000000000..6862915f1fb0 --- /dev/null +++ b/tools/testing/nvdimm/test/ndtest.c @@ -0,0 +1,1129 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/platform_device.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/genalloc.h> +#include <linux/vmalloc.h> +#include <linux/dma-mapping.h> +#include <linux/list_sort.h> +#include <linux/libnvdimm.h> +#include <linux/ndctl.h> +#include <nd-core.h> +#include <linux/printk.h> +#include <linux/seq_buf.h> + +#include "../watermark.h" +#include "nfit_test.h" +#include "ndtest.h" + +enum { + DIMM_SIZE = SZ_32M, + LABEL_SIZE = SZ_128K, + NUM_INSTANCES = 2, + NUM_DCR = 4, + NDTEST_MAX_MAPPING = 6, +}; + +#define NDTEST_SCM_DIMM_CMD_MASK \ + ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ + (1ul << ND_CMD_GET_CONFIG_DATA) | \ + (1ul << ND_CMD_SET_CONFIG_DATA) | \ + (1ul << ND_CMD_CALL)) + +#define NFIT_DIMM_HANDLE(node, socket, imc, chan, dimm) \ + (((node & 0xfff) << 16) | ((socket & 0xf) << 12) \ + | ((imc & 0xf) << 8) | ((chan & 0xf) << 4) | (dimm & 0xf)) + +static DEFINE_SPINLOCK(ndtest_lock); +static struct ndtest_priv *instances[NUM_INSTANCES]; +static struct class *ndtest_dimm_class; +static struct gen_pool *ndtest_pool; + +static struct ndtest_dimm dimm_group1[] = { + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(0, 0, 0, 0, 0), + .uuid_str = "1e5c75d2-b618-11ea-9aa3-507b9ddc0f72", + .physical_id = 0, + .num_formats = 2, + }, + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(0, 0, 0, 0, 1), + .uuid_str = "1c4d43ac-b618-11ea-be80-507b9ddc0f72", + .physical_id = 1, + .num_formats = 2, + }, + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(0, 0, 1, 0, 0), + .uuid_str = "a9f17ffc-b618-11ea-b36d-507b9ddc0f72", + .physical_id = 2, + .num_formats = 2, + }, + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1), + .uuid_str = "b6b83b22-b618-11ea-8aae-507b9ddc0f72", + .physical_id = 3, + .num_formats = 2, + }, + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0), + .uuid_str = "bf9baaee-b618-11ea-b181-507b9ddc0f72", + .physical_id = 4, + .num_formats = 2, + }, +}; + +static struct ndtest_dimm dimm_group2[] = { + { + .size = DIMM_SIZE, + .handle = NFIT_DIMM_HANDLE(1, 0, 0, 0, 0), + .uuid_str = "ca0817e2-b618-11ea-9db3-507b9ddc0f72", + .physical_id = 0, + .num_formats = 1, + .flags = PAPR_PMEM_UNARMED | PAPR_PMEM_EMPTY | + PAPR_PMEM_SAVE_FAILED | PAPR_PMEM_SHUTDOWN_DIRTY | + PAPR_PMEM_HEALTH_FATAL, + }, +}; + +static struct ndtest_mapping region0_mapping[] = { + { + .dimm = 0, + .position = 0, + .start = 0, + .size = SZ_16M, + }, + { + .dimm = 1, + .position = 1, + .start = 0, + .size = SZ_16M, + } +}; + +static struct ndtest_mapping region1_mapping[] = { + { + .dimm = 0, + .position = 0, + .start = SZ_16M, + .size = SZ_16M, + }, + { + .dimm = 1, + .position = 1, + .start = SZ_16M, + .size = SZ_16M, + }, + { + .dimm = 2, + .position = 2, + .start = SZ_16M, + .size = SZ_16M, + }, + { + .dimm = 3, + .position = 3, + .start = SZ_16M, + .size = SZ_16M, + }, +}; + +static struct ndtest_mapping region2_mapping[] = { + { + .dimm = 0, + .position = 0, + .start = 0, + .size = DIMM_SIZE, + }, +}; + +static struct ndtest_mapping region3_mapping[] = { + { + .dimm = 1, + .start = 0, + .size = DIMM_SIZE, + } +}; + +static struct ndtest_mapping region4_mapping[] = { + { + .dimm = 2, + .start = 0, + .size = DIMM_SIZE, + } +}; + +static struct ndtest_mapping region5_mapping[] = { + { + .dimm = 3, + .start = 0, + .size = DIMM_SIZE, + } +}; + +static struct ndtest_region bus0_regions[] = { + { + .type = ND_DEVICE_NAMESPACE_PMEM, + .num_mappings = ARRAY_SIZE(region0_mapping), + .mapping = region0_mapping, + .size = DIMM_SIZE, + .range_index = 1, + }, + { + .type = ND_DEVICE_NAMESPACE_PMEM, + .num_mappings = ARRAY_SIZE(region1_mapping), + .mapping = region1_mapping, + .size = DIMM_SIZE * 2, + .range_index = 2, + }, + { + .type = ND_DEVICE_NAMESPACE_BLK, + .num_mappings = ARRAY_SIZE(region2_mapping), + .mapping = region2_mapping, + .size = DIMM_SIZE, + .range_index = 3, + }, + { + .type = ND_DEVICE_NAMESPACE_BLK, + .num_mappings = ARRAY_SIZE(region3_mapping), + .mapping = region3_mapping, + .size = DIMM_SIZE, + .range_index = 4, + }, + { + .type = ND_DEVICE_NAMESPACE_BLK, + .num_mappings = ARRAY_SIZE(region4_mapping), + .mapping = region4_mapping, + .size = DIMM_SIZE, + .range_index = 5, + }, + { + .type = ND_DEVICE_NAMESPACE_BLK, + .num_mappings = ARRAY_SIZE(region5_mapping), + .mapping = region5_mapping, + .size = DIMM_SIZE, + .range_index = 6, + }, +}; + +static struct ndtest_mapping region6_mapping[] = { + { + .dimm = 0, + .position = 0, + .start = 0, + .size = DIMM_SIZE, + }, +}; + +static struct ndtest_region bus1_regions[] = { + { + .type = ND_DEVICE_NAMESPACE_IO, + .num_mappings = ARRAY_SIZE(region6_mapping), + .mapping = region6_mapping, + .size = DIMM_SIZE, + .range_index = 1, + }, +}; + +static struct ndtest_config bus_configs[NUM_INSTANCES] = { + /* bus 1 */ + { + .dimm_start = 0, + .dimm_count = ARRAY_SIZE(dimm_group1), + .dimms = dimm_group1, + .regions = bus0_regions, + .num_regions = ARRAY_SIZE(bus0_regions), + }, + /* bus 2 */ + { + .dimm_start = ARRAY_SIZE(dimm_group1), + .dimm_count = ARRAY_SIZE(dimm_group2), + .dimms = dimm_group2, + .regions = bus1_regions, + .num_regions = ARRAY_SIZE(bus1_regions), + }, +}; + +static inline struct ndtest_priv *to_ndtest_priv(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + return container_of(pdev, struct ndtest_priv, pdev); +} + +static int ndtest_config_get(struct ndtest_dimm *p, unsigned int buf_len, + struct nd_cmd_get_config_data_hdr *hdr) +{ + unsigned int len; + + if ((hdr->in_offset + hdr->in_length) > LABEL_SIZE) + return -EINVAL; + + hdr->status = 0; + len = min(hdr->in_length, LABEL_SIZE - hdr->in_offset); + memcpy(hdr->out_buf, p->label_area + hdr->in_offset, len); + + return buf_len - len; +} + +static int ndtest_config_set(struct ndtest_dimm *p, unsigned int buf_len, + struct nd_cmd_set_config_hdr *hdr) +{ + unsigned int len; + + if ((hdr->in_offset + hdr->in_length) > LABEL_SIZE) + return -EINVAL; + + len = min(hdr->in_length, LABEL_SIZE - hdr->in_offset); + memcpy(p->label_area + hdr->in_offset, hdr->in_buf, len); + + return buf_len - len; +} + +static int ndtest_get_config_size(struct ndtest_dimm *dimm, unsigned int buf_len, + struct nd_cmd_get_config_size *size) +{ + size->status = 0; + size->max_xfer = 8; + size->config_size = dimm->config_size; + + return 0; +} + +static int ndtest_ctl(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len, int *cmd_rc) +{ + struct ndtest_dimm *dimm; + int _cmd_rc; + + if (!cmd_rc) + cmd_rc = &_cmd_rc; + + *cmd_rc = 0; + + if (!nvdimm) + return -EINVAL; + + dimm = nvdimm_provider_data(nvdimm); + if (!dimm) + return -EINVAL; + + switch (cmd) { + case ND_CMD_GET_CONFIG_SIZE: + *cmd_rc = ndtest_get_config_size(dimm, buf_len, buf); + break; + case ND_CMD_GET_CONFIG_DATA: + *cmd_rc = ndtest_config_get(dimm, buf_len, buf); + break; + case ND_CMD_SET_CONFIG_DATA: + *cmd_rc = ndtest_config_set(dimm, buf_len, buf); + break; + default: + return -EINVAL; + } + + /* Failures for a DIMM can be injected using fail_cmd and + * fail_cmd_code, see the device attributes below + */ + if ((1 << cmd) & dimm->fail_cmd) + return dimm->fail_cmd_code ? dimm->fail_cmd_code : -EIO; + + return 0; +} + +static int ndtest_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, + void *iobuf, u64 len, int rw) +{ + struct ndtest_dimm *dimm = ndbr->blk_provider_data; + struct ndtest_blk_mmio *mmio = dimm->mmio; + struct nd_region *nd_region = &ndbr->nd_region; + unsigned int lane; + + if (!mmio) + return -ENOMEM; + + lane = nd_region_acquire_lane(nd_region); + if (rw) + memcpy(mmio->base + dpa, iobuf, len); + else { + memcpy(iobuf, mmio->base + dpa, len); + arch_invalidate_pmem(mmio->base + dpa, len); + } + + nd_region_release_lane(nd_region, lane); + + return 0; +} + +static int ndtest_blk_region_enable(struct nvdimm_bus *nvdimm_bus, + struct device *dev) +{ + struct nd_blk_region *ndbr = to_nd_blk_region(dev); + struct nvdimm *nvdimm; + struct ndtest_dimm *dimm; + struct ndtest_blk_mmio *mmio; + + nvdimm = nd_blk_region_to_dimm(ndbr); + dimm = nvdimm_provider_data(nvdimm); + + nd_blk_region_set_provider_data(ndbr, dimm); + dimm->blk_region = to_nd_region(dev); + + mmio = devm_kzalloc(dev, sizeof(struct ndtest_blk_mmio), GFP_KERNEL); + if (!mmio) + return -ENOMEM; + + mmio->base = (void __iomem *) devm_nvdimm_memremap( + dev, dimm->address, 12, nd_blk_memremap_flags(ndbr)); + if (!mmio->base) { + dev_err(dev, "%s failed to map blk dimm\n", nvdimm_name(nvdimm)); + return -ENOMEM; + } + mmio->size = dimm->size; + mmio->base_offset = 0; + + dimm->mmio = mmio; + + return 0; +} + +static struct nfit_test_resource *ndtest_resource_lookup(resource_size_t addr) +{ + int i; + + for (i = 0; i < NUM_INSTANCES; i++) { + struct nfit_test_resource *n, *nfit_res = NULL; + struct ndtest_priv *t = instances[i]; + + if (!t) + continue; + spin_lock(&ndtest_lock); + list_for_each_entry(n, &t->resources, list) { + if (addr >= n->res.start && (addr < n->res.start + + resource_size(&n->res))) { + nfit_res = n; + break; + } else if (addr >= (unsigned long) n->buf + && (addr < (unsigned long) n->buf + + resource_size(&n->res))) { + nfit_res = n; + break; + } + } + spin_unlock(&ndtest_lock); + if (nfit_res) + return nfit_res; + } + + pr_warn("Failed to get resource\n"); + + return NULL; +} + +static void ndtest_release_resource(void *data) +{ + struct nfit_test_resource *res = data; + + spin_lock(&ndtest_lock); + list_del(&res->list); + spin_unlock(&ndtest_lock); + + if (resource_size(&res->res) >= DIMM_SIZE) + gen_pool_free(ndtest_pool, res->res.start, + resource_size(&res->res)); + vfree(res->buf); + kfree(res); +} + +static void *ndtest_alloc_resource(struct ndtest_priv *p, size_t size, + dma_addr_t *dma) +{ + dma_addr_t __dma; + void *buf; + struct nfit_test_resource *res; + struct genpool_data_align data = { + .align = SZ_128M, + }; + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return NULL; + + buf = vmalloc(size); + if (size >= DIMM_SIZE) + __dma = gen_pool_alloc_algo(ndtest_pool, size, + gen_pool_first_fit_align, &data); + else + __dma = (unsigned long) buf; + + if (!__dma) + goto buf_err; + + INIT_LIST_HEAD(&res->list); + res->dev = &p->pdev.dev; + res->buf = buf; + res->res.start = __dma; + res->res.end = __dma + size - 1; + res->res.name = "NFIT"; + spin_lock_init(&res->lock); + INIT_LIST_HEAD(&res->requests); + spin_lock(&ndtest_lock); + list_add(&res->list, &p->resources); + spin_unlock(&ndtest_lock); + + if (dma) + *dma = __dma; + + if (!devm_add_action(&p->pdev.dev, ndtest_release_resource, res)) + return res->buf; + +buf_err: + if (__dma && size >= DIMM_SIZE) + gen_pool_free(ndtest_pool, __dma, size); + if (buf) + vfree(buf); + kfree(res); + + return NULL; +} + +static ssize_t range_index_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + struct ndtest_region *region = nd_region_provider_data(nd_region); + + return sprintf(buf, "%d\n", region->range_index); +} +static DEVICE_ATTR_RO(range_index); + +static struct attribute *ndtest_region_attributes[] = { + &dev_attr_range_index.attr, + NULL, +}; + +static const struct attribute_group ndtest_region_attribute_group = { + .name = "papr", + .attrs = ndtest_region_attributes, +}; + +static const struct attribute_group *ndtest_region_attribute_groups[] = { + &ndtest_region_attribute_group, + NULL, +}; + +static int ndtest_create_region(struct ndtest_priv *p, + struct ndtest_region *region) +{ + struct nd_mapping_desc mappings[NDTEST_MAX_MAPPING]; + struct nd_blk_region_desc ndbr_desc; + struct nd_interleave_set *nd_set; + struct nd_region_desc *ndr_desc; + struct resource res; + int i, ndimm = region->mapping[0].dimm; + u64 uuid[2]; + + memset(&res, 0, sizeof(res)); + memset(&mappings, 0, sizeof(mappings)); + memset(&ndbr_desc, 0, sizeof(ndbr_desc)); + ndr_desc = &ndbr_desc.ndr_desc; + + if (!ndtest_alloc_resource(p, region->size, &res.start)) + return -ENOMEM; + + res.end = res.start + region->size - 1; + ndr_desc->mapping = mappings; + ndr_desc->res = &res; + ndr_desc->provider_data = region; + ndr_desc->attr_groups = ndtest_region_attribute_groups; + + if (uuid_parse(p->config->dimms[ndimm].uuid_str, (uuid_t *)uuid)) { + pr_err("failed to parse UUID\n"); + return -ENXIO; + } + + nd_set = devm_kzalloc(&p->pdev.dev, sizeof(*nd_set), GFP_KERNEL); + if (!nd_set) + return -ENOMEM; + + nd_set->cookie1 = cpu_to_le64(uuid[0]); + nd_set->cookie2 = cpu_to_le64(uuid[1]); + nd_set->altcookie = nd_set->cookie1; + ndr_desc->nd_set = nd_set; + + if (region->type == ND_DEVICE_NAMESPACE_BLK) { + mappings[0].start = 0; + mappings[0].size = DIMM_SIZE; + mappings[0].nvdimm = p->config->dimms[ndimm].nvdimm; + + ndr_desc->mapping = &mappings[0]; + ndr_desc->num_mappings = 1; + ndr_desc->num_lanes = 1; + ndbr_desc.enable = ndtest_blk_region_enable; + ndbr_desc.do_io = ndtest_blk_do_io; + region->region = nvdimm_blk_region_create(p->bus, ndr_desc); + + goto done; + } + + for (i = 0; i < region->num_mappings; i++) { + ndimm = region->mapping[i].dimm; + mappings[i].start = region->mapping[i].start; + mappings[i].size = region->mapping[i].size; + mappings[i].position = region->mapping[i].position; + mappings[i].nvdimm = p->config->dimms[ndimm].nvdimm; + } + + ndr_desc->num_mappings = region->num_mappings; + region->region = nvdimm_pmem_region_create(p->bus, ndr_desc); + +done: + if (!region->region) { + dev_err(&p->pdev.dev, "Error registering region %pR\n", + ndr_desc->res); + return -ENXIO; + } + + return 0; +} + +static int ndtest_init_regions(struct ndtest_priv *p) +{ + int i, ret = 0; + + for (i = 0; i < p->config->num_regions; i++) { + ret = ndtest_create_region(p, &p->config->regions[i]); + if (ret) + return ret; + } + + return 0; +} + +static void put_dimms(void *data) +{ + struct ndtest_priv *p = data; + int i; + + for (i = 0; i < p->config->dimm_count; i++) + if (p->config->dimms[i].dev) { + device_unregister(p->config->dimms[i].dev); + p->config->dimms[i].dev = NULL; + } +} + +static ssize_t handle_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ndtest_dimm *dimm = dev_get_drvdata(dev); + + return sprintf(buf, "%#x\n", dimm->handle); +} +static DEVICE_ATTR_RO(handle); + +static ssize_t fail_cmd_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ndtest_dimm *dimm = dev_get_drvdata(dev); + + return sprintf(buf, "%#x\n", dimm->fail_cmd); +} + +static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + struct ndtest_dimm *dimm = dev_get_drvdata(dev); + unsigned long val; + ssize_t rc; + + rc = kstrtol(buf, 0, &val); + if (rc) + return rc; + + dimm->fail_cmd = val; + + return size; +} +static DEVICE_ATTR_RW(fail_cmd); + +static ssize_t fail_cmd_code_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ndtest_dimm *dimm = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", dimm->fail_cmd_code); +} + +static ssize_t fail_cmd_code_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + struct ndtest_dimm *dimm = dev_get_drvdata(dev); + unsigned long val; + ssize_t rc; + + rc = kstrtol(buf, 0, &val); + if (rc) + return rc; + + dimm->fail_cmd_code = val; + return size; +} +static DEVICE_ATTR_RW(fail_cmd_code); + +static struct attribute *dimm_attributes[] = { + &dev_attr_handle.attr, + &dev_attr_fail_cmd.attr, + &dev_attr_fail_cmd_code.attr, + NULL, +}; + +static struct attribute_group dimm_attribute_group = { + .attrs = dimm_attributes, +}; + +static const struct attribute_group *dimm_attribute_groups[] = { + &dimm_attribute_group, + NULL, +}; + +static ssize_t phys_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + return sprintf(buf, "%#x\n", dimm->physical_id); +} +static DEVICE_ATTR_RO(phys_id); + +static ssize_t vendor_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "0x1234567\n"); +} +static DEVICE_ATTR_RO(vendor); + +static ssize_t id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + return sprintf(buf, "%04x-%02x-%04x-%08x", 0xabcd, + 0xa, 2016, ~(dimm->handle)); +} +static DEVICE_ATTR_RO(id); + +static ssize_t nvdimm_handle_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + return sprintf(buf, "%#x\n", dimm->handle); +} + +static struct device_attribute dev_attr_nvdimm_show_handle = { + .attr = { .name = "handle", .mode = 0444 }, + .show = nvdimm_handle_show, +}; + +static ssize_t subsystem_vendor_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "0x%04x\n", 0); +} +static DEVICE_ATTR_RO(subsystem_vendor); + +static ssize_t dirty_shutdown_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", 42); +} +static DEVICE_ATTR_RO(dirty_shutdown); + +static ssize_t formats_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + return sprintf(buf, "%d\n", dimm->num_formats); +} +static DEVICE_ATTR_RO(formats); + +static ssize_t format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + if (dimm->num_formats > 1) + return sprintf(buf, "0x201\n"); + + return sprintf(buf, "0x101\n"); +} +static DEVICE_ATTR_RO(format); + +static ssize_t format1_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "0x301\n"); +} +static DEVICE_ATTR_RO(format1); + +static umode_t ndtest_nvdimm_attr_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + + if (a == &dev_attr_format1.attr && dimm->num_formats <= 1) + return 0; + + return a->mode; +} + +static ssize_t flags_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct ndtest_dimm *dimm = nvdimm_provider_data(nvdimm); + struct seq_buf s; + u64 flags; + + flags = dimm->flags; + + seq_buf_init(&s, buf, PAGE_SIZE); + if (flags & PAPR_PMEM_UNARMED_MASK) + seq_buf_printf(&s, "not_armed "); + + if (flags & PAPR_PMEM_BAD_SHUTDOWN_MASK) + seq_buf_printf(&s, "flush_fail "); + + if (flags & PAPR_PMEM_BAD_RESTORE_MASK) + seq_buf_printf(&s, "restore_fail "); + + if (flags & PAPR_PMEM_SAVE_MASK) + seq_buf_printf(&s, "save_fail "); + + if (flags & PAPR_PMEM_SMART_EVENT_MASK) + seq_buf_printf(&s, "smart_notify "); + + + if (seq_buf_used(&s)) + seq_buf_printf(&s, "\n"); + + return seq_buf_used(&s); +} +static DEVICE_ATTR_RO(flags); + +static struct attribute *ndtest_nvdimm_attributes[] = { + &dev_attr_nvdimm_show_handle.attr, + &dev_attr_vendor.attr, + &dev_attr_id.attr, + &dev_attr_phys_id.attr, + &dev_attr_subsystem_vendor.attr, + &dev_attr_dirty_shutdown.attr, + &dev_attr_formats.attr, + &dev_attr_format.attr, + &dev_attr_format1.attr, + &dev_attr_flags.attr, + NULL, +}; + +static const struct attribute_group ndtest_nvdimm_attribute_group = { + .name = "papr", + .attrs = ndtest_nvdimm_attributes, + .is_visible = ndtest_nvdimm_attr_visible, +}; + +static const struct attribute_group *ndtest_nvdimm_attribute_groups[] = { + &ndtest_nvdimm_attribute_group, + NULL, +}; + +static int ndtest_dimm_register(struct ndtest_priv *priv, + struct ndtest_dimm *dimm, int id) +{ + struct device *dev = &priv->pdev.dev; + unsigned long dimm_flags = dimm->flags; + + if (dimm->num_formats > 1) { + set_bit(NDD_ALIASING, &dimm_flags); + set_bit(NDD_LABELING, &dimm_flags); + } + + if (dimm->flags & PAPR_PMEM_UNARMED_MASK) + set_bit(NDD_UNARMED, &dimm_flags); + + dimm->nvdimm = nvdimm_create(priv->bus, dimm, + ndtest_nvdimm_attribute_groups, dimm_flags, + NDTEST_SCM_DIMM_CMD_MASK, 0, NULL); + if (!dimm->nvdimm) { + dev_err(dev, "Error creating DIMM object for %pOF\n", priv->dn); + return -ENXIO; + } + + dimm->dev = device_create_with_groups(ndtest_dimm_class, + &priv->pdev.dev, + 0, dimm, dimm_attribute_groups, + "test_dimm%d", id); + if (!dimm->dev) { + pr_err("Could not create dimm device attributes\n"); + return -ENOMEM; + } + + return 0; +} + +static int ndtest_nvdimm_init(struct ndtest_priv *p) +{ + struct ndtest_dimm *d; + void *res; + int i, id; + + for (i = 0; i < p->config->dimm_count; i++) { + d = &p->config->dimms[i]; + d->id = id = p->config->dimm_start + i; + res = ndtest_alloc_resource(p, LABEL_SIZE, NULL); + if (!res) + return -ENOMEM; + + d->label_area = res; + sprintf(d->label_area, "label%d", id); + d->config_size = LABEL_SIZE; + + if (!ndtest_alloc_resource(p, d->size, + &p->dimm_dma[id])) + return -ENOMEM; + + if (!ndtest_alloc_resource(p, LABEL_SIZE, + &p->label_dma[id])) + return -ENOMEM; + + if (!ndtest_alloc_resource(p, LABEL_SIZE, + &p->dcr_dma[id])) + return -ENOMEM; + + d->address = p->dimm_dma[id]; + + ndtest_dimm_register(p, d, id); + } + + return 0; +} + +static ssize_t compatible_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "nvdimm_test"); +} +static DEVICE_ATTR_RO(compatible); + +static struct attribute *of_node_attributes[] = { + &dev_attr_compatible.attr, + NULL +}; + +static const struct attribute_group of_node_attribute_group = { + .name = "of_node", + .attrs = of_node_attributes, +}; + +static const struct attribute_group *ndtest_attribute_groups[] = { + &of_node_attribute_group, + NULL, +}; + +static int ndtest_bus_register(struct ndtest_priv *p) +{ + p->config = &bus_configs[p->pdev.id]; + + p->bus_desc.ndctl = ndtest_ctl; + p->bus_desc.module = THIS_MODULE; + p->bus_desc.provider_name = NULL; + p->bus_desc.attr_groups = ndtest_attribute_groups; + + p->bus = nvdimm_bus_register(&p->pdev.dev, &p->bus_desc); + if (!p->bus) { + dev_err(&p->pdev.dev, "Error creating nvdimm bus %pOF\n", p->dn); + return -ENOMEM; + } + + return 0; +} + +static int ndtest_remove(struct platform_device *pdev) +{ + struct ndtest_priv *p = to_ndtest_priv(&pdev->dev); + + nvdimm_bus_unregister(p->bus); + return 0; +} + +static int ndtest_probe(struct platform_device *pdev) +{ + struct ndtest_priv *p; + int rc; + + p = to_ndtest_priv(&pdev->dev); + if (ndtest_bus_register(p)) + return -ENOMEM; + + p->dcr_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR, + sizeof(dma_addr_t), GFP_KERNEL); + p->label_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR, + sizeof(dma_addr_t), GFP_KERNEL); + p->dimm_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR, + sizeof(dma_addr_t), GFP_KERNEL); + + rc = ndtest_nvdimm_init(p); + if (rc) + goto err; + + rc = ndtest_init_regions(p); + if (rc) + goto err; + + rc = devm_add_action_or_reset(&pdev->dev, put_dimms, p); + if (rc) + goto err; + + platform_set_drvdata(pdev, p); + + return 0; + +err: + pr_err("%s:%d Failed nvdimm init\n", __func__, __LINE__); + return rc; +} + +static const struct platform_device_id ndtest_id[] = { + { KBUILD_MODNAME }, + { }, +}; + +static struct platform_driver ndtest_driver = { + .probe = ndtest_probe, + .remove = ndtest_remove, + .driver = { + .name = KBUILD_MODNAME, + }, + .id_table = ndtest_id, +}; + +static void ndtest_release(struct device *dev) +{ + struct ndtest_priv *p = to_ndtest_priv(dev); + + kfree(p); +} + +static void cleanup_devices(void) +{ + int i; + + for (i = 0; i < NUM_INSTANCES; i++) + if (instances[i]) + platform_device_unregister(&instances[i]->pdev); + + nfit_test_teardown(); + + if (ndtest_pool) + gen_pool_destroy(ndtest_pool); + + + if (ndtest_dimm_class) + class_destroy(ndtest_dimm_class); +} + +static __init int ndtest_init(void) +{ + int rc, i; + + pmem_test(); + libnvdimm_test(); + device_dax_test(); + dax_pmem_test(); + dax_pmem_core_test(); +#ifdef CONFIG_DEV_DAX_PMEM_COMPAT + dax_pmem_compat_test(); +#endif + + nfit_test_setup(ndtest_resource_lookup, NULL); + + ndtest_dimm_class = class_create(THIS_MODULE, "nfit_test_dimm"); + if (IS_ERR(ndtest_dimm_class)) { + rc = PTR_ERR(ndtest_dimm_class); + goto err_register; + } + + ndtest_pool = gen_pool_create(ilog2(SZ_4M), NUMA_NO_NODE); + if (!ndtest_pool) { + rc = -ENOMEM; + goto err_register; + } + + if (gen_pool_add(ndtest_pool, SZ_4G, SZ_4G, NUMA_NO_NODE)) { + rc = -ENOMEM; + goto err_register; + } + + /* Each instance can be taken as a bus, which can have multiple dimms */ + for (i = 0; i < NUM_INSTANCES; i++) { + struct ndtest_priv *priv; + struct platform_device *pdev; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + rc = -ENOMEM; + goto err_register; + } + + INIT_LIST_HEAD(&priv->resources); + pdev = &priv->pdev; + pdev->name = KBUILD_MODNAME; + pdev->id = i; + pdev->dev.release = ndtest_release; + rc = platform_device_register(pdev); + if (rc) { + put_device(&pdev->dev); + goto err_register; + } + get_device(&pdev->dev); + + instances[i] = priv; + } + + rc = platform_driver_register(&ndtest_driver); + if (rc) + goto err_register; + + return 0; + +err_register: + pr_err("Error registering platform device\n"); + cleanup_devices(); + + return rc; +} + +static __exit void ndtest_exit(void) +{ + cleanup_devices(); + platform_driver_unregister(&ndtest_driver); +} + +module_init(ndtest_init); +module_exit(ndtest_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("IBM Corporation"); diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h new file mode 100644 index 000000000000..2c54c9cbb90c --- /dev/null +++ b/tools/testing/nvdimm/test/ndtest.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef NDTEST_H +#define NDTEST_H + +#include <linux/platform_device.h> +#include <linux/libnvdimm.h> + +/* SCM device is unable to persist memory contents */ +#define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) +/* SCM device failed to persist memory contents */ +#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) +/* SCM device contents are not persisted from previous IPL */ +#define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) +#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) +/* SCM device will be garded off next IPL due to failure */ +#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) +/* SCM contents cannot persist due to current platform health status */ +#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) + +/* Bits status indicators for health bitmap indicating unarmed dimm */ +#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +#define PAPR_PMEM_SAVE_FAILED (1ULL << (63 - 10)) + +/* Bits status indicators for health bitmap indicating unflushed dimm */ +#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) + +/* Bits status indicators for health bitmap indicating unrestored dimm */ +#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) + +/* Bit status indicators for smart event notification */ +#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ + PAPR_PMEM_HEALTH_FATAL | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +#define PAPR_PMEM_SAVE_MASK (PAPR_PMEM_SAVE_FAILED) + +struct ndtest_config; + +struct ndtest_priv { + struct platform_device pdev; + struct device_node *dn; + struct list_head resources; + struct nvdimm_bus_descriptor bus_desc; + struct nvdimm_bus *bus; + struct ndtest_config *config; + + dma_addr_t *dcr_dma; + dma_addr_t *label_dma; + dma_addr_t *dimm_dma; +}; + +struct ndtest_blk_mmio { + void __iomem *base; + u64 size; + u64 base_offset; + u32 line_size; + u32 num_lines; + u32 table_size; +}; + +struct ndtest_dimm { + struct device *dev; + struct nvdimm *nvdimm; + struct ndtest_blk_mmio *mmio; + struct nd_region *blk_region; + + dma_addr_t address; + unsigned long long flags; + unsigned long config_size; + void *label_area; + char *uuid_str; + + unsigned int size; + unsigned int handle; + unsigned int fail_cmd; + unsigned int physical_id; + unsigned int num_formats; + int id; + int fail_cmd_code; + u8 no_alias; +}; + +struct ndtest_mapping { + u64 start; + u64 size; + u8 position; + u8 dimm; +}; + +struct ndtest_region { + struct nd_region *region; + struct ndtest_mapping *mapping; + u64 size; + u8 type; + u8 num_mappings; + u8 range_index; +}; + +struct ndtest_config { + struct ndtest_dimm *dimms; + struct ndtest_region *regions; + unsigned int dimm_count; + unsigned int dimm_start; + u8 num_regions; +}; + +#endif /* NDTEST_H */ diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index 7065163a8388..537d65968c48 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -6,6 +6,7 @@ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> +#include <string.h> #include <unistd.h> #include <sys/ioctl.h> #include <sys/mman.h> @@ -35,7 +36,7 @@ struct map_benchmark { __s32 node; /* which numa node this benchmark will run on */ __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ - __u64 expansion[10]; /* For future use */ + __u8 expansion[84]; /* For future use */ }; int main(int argc, char **argv) @@ -102,6 +103,7 @@ int main(int argc, char **argv) exit(1); } + memset(&map, 0, sizeof(map)); map.seconds = seconds; map.threads = threads; map.node = node; diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh index 16a9dd43aefc..8d91191a098c 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh @@ -7,9 +7,11 @@ ALL_TESTS=" ipv4_route_addition_test ipv4_route_deletion_test ipv4_route_replacement_test + ipv4_route_offload_failed_test ipv6_route_addition_test ipv6_route_deletion_test ipv6_route_replacement_test + ipv6_route_offload_failed_test " NETDEVSIM_PATH=/sys/bus/netdevsim/ @@ -17,9 +19,26 @@ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} DEVLINK_DEV=netdevsim/${DEV} SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/ NUM_NETIFS=0 source $lib_dir/lib.sh +check_rt_offload_failed() +{ + local outfile=$1; shift + local line + + # Make sure that the first notification was emitted without + # RTM_F_OFFLOAD_FAILED flag and the second with RTM_F_OFFLOAD_FAILED + # flag + head -n 1 $outfile | grep -q "rt_offload_failed" + if [[ $? -eq 0 ]]; then + return 1 + fi + + head -n 2 $outfile | tail -n 1 | grep -q "rt_offload_failed" +} + check_rt_trap() { local outfile=$1; shift @@ -39,15 +58,23 @@ route_notify_check() { local outfile=$1; shift local expected_num_lines=$1; shift + local offload_failed=${1:-0}; shift # check the monitor results lines=`wc -l $outfile | cut "-d " -f1` test $lines -eq $expected_num_lines check_err $? "$expected_num_lines notifications were expected but $lines were received" - if [[ $expected_num_lines -eq 2 ]]; then + if [[ $expected_num_lines -eq 1 ]]; then + return + fi + + if [[ $offload_failed -eq 0 ]]; then check_rt_trap $outfile check_err $? "Wrong RTM_F_TRAP flags in notifications" + else + check_rt_offload_failed $outfile + check_err $? "Wrong RTM_F_OFFLOAD_FAILED flags in notifications" fi } @@ -57,6 +84,7 @@ route_addition_check() local notify=$1; shift local route=$1; shift local expected_num_notifications=$1; shift + local offload_failed=${1:-0}; shift ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify @@ -68,7 +96,7 @@ route_addition_check() sleep 1 kill %% && wait %% &> /dev/null - route_notify_check $outfile $expected_num_notifications + route_notify_check $outfile $expected_num_notifications $offload_failed rm -f $outfile $IP route del $route dev dummy1 @@ -93,6 +121,13 @@ ipv4_route_addition_test() expected_num_notifications=2 route_addition_check $ip $notify $route $expected_num_notifications + # notify=2 means emit notifications only for failed route installation, + # make sure a single notification will be emitted for the programmed + # route. + notify=2 + expected_num_notifications=1 + route_addition_check $ip $notify $route $expected_num_notifications + log_test "IPv4 route addition" } @@ -185,11 +220,55 @@ ipv4_route_replacement_test() expected_num_notifications=2 route_replacement_check $ip $notify $route $expected_num_notifications + # notify=2 means emit notifications only for failed route installation, + # make sure a single notification will be emitted for the new route. + notify=2 + expected_num_notifications=1 + route_replacement_check $ip $notify $route $expected_num_notifications + $IP link del name dummy2 log_test "IPv4 route replacement" } +ipv4_route_offload_failed_test() +{ + + RET=0 + + local ip="ipv4" + local route=192.0.2.0/24 + local offload_failed=1 + + echo "y"> $DEBUGFS_DIR/fib/fail_route_offload + check_err $? "Failed to setup route offload to fail" + + # Make sure a single notification will be emitted for the programmed + # route. + local notify=0 + local expected_num_notifications=1 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + # Make sure two notifications will be emitted for the new route. + notify=1 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + # notify=2 means emit notifications only for failed route installation, + # make sure two notifications will be emitted for the new route. + notify=2 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + echo "n"> $DEBUGFS_DIR/fib/fail_route_offload + check_err $? "Failed to setup route offload not to fail" + + log_test "IPv4 route offload failed" +} + ipv6_route_addition_test() { RET=0 @@ -208,6 +287,13 @@ ipv6_route_addition_test() expected_num_notifications=2 route_addition_check $ip $notify $route $expected_num_notifications + # notify=2 means emit notifications only for failed route installation, + # make sure a single notification will be emitted for the programmed + # route. + notify=2 + expected_num_notifications=1 + route_addition_check $ip $notify $route $expected_num_notifications + log_test "IPv6 route addition" } @@ -250,11 +336,55 @@ ipv6_route_replacement_test() expected_num_notifications=2 route_replacement_check $ip $notify $route $expected_num_notifications + # notify=2 means emit notifications only for failed route installation, + # make sure a single notification will be emitted for the new route. + notify=2 + expected_num_notifications=1 + route_replacement_check $ip $notify $route $expected_num_notifications + $IP link del name dummy2 log_test "IPv6 route replacement" } +ipv6_route_offload_failed_test() +{ + + RET=0 + + local ip="ipv6" + local route=2001:db8:1::/64 + local offload_failed=1 + + echo "y"> $DEBUGFS_DIR/fib/fail_route_offload + check_err $? "Failed to setup route offload to fail" + + # Make sure a single notification will be emitted for the programmed + # route. + local notify=0 + local expected_num_notifications=1 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + # Make sure two notifications will be emitted for the new route. + notify=1 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + # notify=2 means emit notifications only for failed route installation, + # make sure two notifications will be emitted for the new route. + notify=2 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + echo "n"> $DEBUGFS_DIR/fib/fail_route_offload + check_err $? "Failed to setup route offload not to fail" + + log_test "IPv6 route offload failed" +} + setup_prepare() { modprobe netdevsim &> /dev/null diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 2cfd87d94db8..10a030b53b23 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -128,6 +128,7 @@ cleanup() local netns for netns in "$ns1" "$ns2" "$ns3" "$ns4";do ip netns del $netns + rm -f /tmp/$netns.{nstat,out} done } @@ -333,6 +334,21 @@ do_ping() return 0 } +# $1: ns, $2: MIB counter +get_mib_counter() +{ + local listener_ns="${1}" + local mib="${2}" + + # strip the header + ip netns exec "${listener_ns}" \ + nstat -z -a "${mib}" | \ + tail -n+2 | \ + while read a count c rest; do + echo $count + done +} + # $1: ns, $2: port wait_local_port_listen() { @@ -409,10 +425,10 @@ do_transfer() sleep 1 fi - local stat_synrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX | while read a count c rest ;do echo $count;done) - local stat_ackrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX | while read a count c rest ;do echo $count;done) - local stat_cookietx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do echo $count;done) - local stat_cookierx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do echo $count;done) + local stat_synrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") + local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" & local spid=$! @@ -438,16 +454,26 @@ do_transfer() kill ${cappid_connector} fi + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat | grep Tcp > /tmp/${listener_ns}.out + if [ ${listener_ns} != ${connector_ns} ]; then + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat | grep Tcp > /tmp/${connector_ns}.out + fi + local duration duration=$((stop-start)) - duration=$(printf "(duration %05sms)" $duration) + printf "(duration %05sms) " "${duration}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then - echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2 + echo "[ FAIL ] client exit code $retc, server $rets" 1>&2 echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port" + ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" + cat /tmp/${listener_ns}.out echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" + ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" + [ ${listener_ns} != ${connector_ns} ] && cat /tmp/${connector_ns}.out + echo cat "$capout" return 1 fi @@ -457,11 +483,10 @@ do_transfer() check_transfer $cin $sout "file received by server" rets=$? - local stat_synrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX | while read a count c rest ;do echo $count;done) - local stat_ackrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX | while read a count c rest ;do echo $count;done) - - local stat_cookietx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do echo $count;done) - local stat_cookierx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do echo $count;done) + local stat_synrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") + local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -473,37 +498,50 @@ do_transfer() expect_synrx=$((stat_synrx_last_l+1)) expect_ackrx=$((stat_ackrx_last_l+1)) fi + + if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then + printf "[ FAIL ] lower MPC SYN rx (%d) than expected (%d)\n" \ + "${stat_synrx_now_l}" "${expect_synrx}" 1>&2 + retc=1 + fi + if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then + printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \ + "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2 + rets=1 + fi + + if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then + printf "[ OK ]" + fi + if [ $cookies -eq 2 ];then if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then - echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: did not advance" + printf " WARN: CookieSent: did not advance" fi if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then - echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: did not advance" + printf " WARN: CookieRecv: did not advance" fi else if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then - echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: changed" + printf " WARN: CookieSent: changed" fi if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then - echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: changed" + printf " WARN: CookieRecv: changed" fi fi - if [ $expect_synrx -ne $stat_synrx_now_l ] ;then - echo "${listener_ns} SYNRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}" + if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then + printf " WARN: SYNRX: expect %d, got %d (probably retransmissions)" \ + "${expect_synrx}" "${stat_synrx_now_l}" fi - if [ $expect_ackrx -ne $stat_ackrx_now_l ] ;then - echo "${listener_ns} ACKRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}" - fi - - if [ $retc -eq 0 ] && [ $rets -eq 0 ];then - echo "$duration [ OK ]" - cat "$capout" - return 0 + if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then + printf " WARN: ACKRX: expect %d, got %d (probably retransmissions)" \ + "${expect_ackrx}" "${stat_ackrx_now_l}" fi + echo cat "$capout" - return 1 + [ $retc -eq 0 ] && [ $rets -eq 0 ] } make_file() diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 3155fbbf644b..b4cca382d125 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -196,7 +196,7 @@ static void do_recv_errqueue_timeout(int fdt) default: error(1, 0, "errqueue: errno %u code %u\n", err->ee_errno, err->ee_code); - }; + } tstamp = ((int64_t) err->ee_data) << 32 | err->ee_info; tstamp -= (int64_t) glob_tstart; diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index e0088c2d38a5..426d07875a48 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -133,7 +133,10 @@ FIXTURE_VARIANT_ADD(tls, 13_chacha) FIXTURE_SETUP(tls) { - union tls_crypto_context tls12; + union { + struct tls12_crypto_info_aes_gcm_128 aes128; + struct tls12_crypto_info_chacha20_poly1305 chacha20; + } tls12; struct sockaddr_in addr; socklen_t len; int sfd, ret; @@ -143,14 +146,16 @@ FIXTURE_SETUP(tls) len = sizeof(addr); memset(&tls12, 0, sizeof(tls12)); - tls12.info.version = variant->tls_version; - tls12.info.cipher_type = variant->cipher_type; switch (variant->cipher_type) { case TLS_CIPHER_CHACHA20_POLY1305: - tls12_sz = sizeof(tls12_crypto_info_chacha20_poly1305); + tls12_sz = sizeof(struct tls12_crypto_info_chacha20_poly1305); + tls12.chacha20.info.version = variant->tls_version; + tls12.chacha20.info.cipher_type = variant->cipher_type; break; case TLS_CIPHER_AES_GCM_128: - tls12_sz = sizeof(tls12_crypto_info_aes_gcm_128); + tls12_sz = sizeof(struct tls12_crypto_info_aes_gcm_128); + tls12.aes128.info.version = variant->tls_version; + tls12.aes128.info.cipher_type = variant->cipher_type; break; default: tls12_sz = 0; diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index 490a8cca708a..fabb1d555ee5 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -26,6 +26,7 @@ #include <inttypes.h> #include <linux/errqueue.h> #include <linux/if_ether.h> +#include <linux/if_packet.h> #include <linux/ipv6.h> #include <linux/net_tstamp.h> #include <netdb.h> @@ -34,7 +35,6 @@ #include <netinet/ip.h> #include <netinet/udp.h> #include <netinet/tcp.h> -#include <netpacket/packet.h> #include <poll.h> #include <stdarg.h> #include <stdbool.h> @@ -495,12 +495,12 @@ static void do_test(int family, unsigned int report_opt) total_len = cfg_payload_len; if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { total_len += sizeof(struct udphdr); - if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) + if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) { if (family == PF_INET) total_len += sizeof(struct iphdr); else total_len += sizeof(struct ipv6hdr); - + } /* special case, only rawv6_sendmsg: * pass proto in sin6_port if not connected * also see ANK comment in net/ipv4/raw.c diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh index 087f0e6e71ce..f33154c04d34 100755 --- a/tools/testing/selftests/netfilter/nft_meta.sh +++ b/tools/testing/selftests/netfilter/nft_meta.sh @@ -23,7 +23,7 @@ ip -net "$ns0" addr add 127.0.0.1 dev lo trap cleanup EXIT -currentyear=$(date +%G) +currentyear=$(date +%Y) lastyear=$((currentyear-1)) ip netns exec "$ns0" nft -f /dev/stdin <<EOF table inet filter { diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c index 6689f1183dbf..073a03702ff5 100644 --- a/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c +++ b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c @@ -22,6 +22,8 @@ # define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 # define PR_SYS_DISPATCH_ON 1 +# define SYSCALL_DISPATCH_FILTER_ALLOW 0 +# define SYSCALL_DISPATCH_FILTER_BLOCK 1 #endif #ifdef __NR_syscalls @@ -55,8 +57,8 @@ unsigned long trapped_call_count = 0; unsigned long native_call_count = 0; char selector; -#define SYSCALL_BLOCK (selector = PR_SYS_DISPATCH_ON) -#define SYSCALL_UNBLOCK (selector = PR_SYS_DISPATCH_OFF) +#define SYSCALL_BLOCK (selector = SYSCALL_DISPATCH_FILTER_BLOCK) +#define SYSCALL_UNBLOCK (selector = SYSCALL_DISPATCH_FILTER_ALLOW) #define CALIBRATION_STEP 100000 #define CALIBRATE_TO_SECS 5 @@ -170,7 +172,7 @@ int main(void) syscall(MAGIC_SYSCALL_1); #ifdef TEST_BLOCKED_RETURN - if (selector == PR_SYS_DISPATCH_OFF) { + if (selector == SYSCALL_DISPATCH_FILTER_ALLOW) { fprintf(stderr, "Failed to return with selector blocked.\n"); exit(-1); } diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c index 6498b050ef89..b5d592d4099e 100644 --- a/tools/testing/selftests/syscall_user_dispatch/sud_test.c +++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c @@ -18,6 +18,8 @@ # define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 # define PR_SYS_DISPATCH_ON 1 +# define SYSCALL_DISPATCH_FILTER_ALLOW 0 +# define SYSCALL_DISPATCH_FILTER_BLOCK 1 #endif #ifndef SYS_USER_DISPATCH @@ -30,8 +32,8 @@ # define MAGIC_SYSCALL_1 (0xff00) /* Bad Linux syscall number */ #endif -#define SYSCALL_DISPATCH_ON(x) ((x) = 1) -#define SYSCALL_DISPATCH_OFF(x) ((x) = 0) +#define SYSCALL_DISPATCH_ON(x) ((x) = SYSCALL_DISPATCH_FILTER_BLOCK) +#define SYSCALL_DISPATCH_OFF(x) ((x) = SYSCALL_DISPATCH_FILTER_ALLOW) /* Test Summary: * @@ -56,7 +58,7 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS) { - char sel = 0; + char sel = SYSCALL_DISPATCH_FILTER_ALLOW; struct sysinfo info; int ret; @@ -79,7 +81,7 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS) TEST(bad_prctl_param) { - char sel = 0; + char sel = SYSCALL_DISPATCH_FILTER_ALLOW; int op; /* Invalid op */ @@ -220,7 +222,7 @@ TEST_SIGNAL(bad_selector, SIGSYS) sigset_t mask; struct sysinfo info; - glob_sel = 0; + glob_sel = SYSCALL_DISPATCH_FILTER_ALLOW; nr_syscalls_emulated = 0; si_code = 0; si_errno = 0; @@ -288,7 +290,7 @@ TEST(direct_dispatch_range) { int ret = 0; struct sysinfo info; - char sel = 0; + char sel = SYSCALL_DISPATCH_FILTER_ALLOW; /* * Instead of calculating libc addresses; allow the entire diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json index e09d3c0e307f..bd64a4bf11ab 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json @@ -201,5 +201,51 @@ "teardown": [ "$TC qdisc del dev $DEV1 ingress" ] + }, + { + "id": "0692", + "name": "Test u32 sample option, divisor 256", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress", + "$TC filter add dev $DEV1 ingress prio 99 handle 1: u32 divisor 256" + ], + "cmdUnderTest": "bash -c \"for mask in ff ffff ffffff ffffffff ff00ff ff0000ff ffff00ff; do $TC filter add dev $DEV1 ingress prio 99 u32 ht 1: sample u32 0x10203040 \\$mask match u8 0 0 classid 1:1; done\"", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "filter protocol all pref 99 u32( (chain|fh|order) [0-9:]+){3} key ht 1 bkt 40 flowid 1:1", + "matchCount": "7", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "2478", + "name": "Test u32 sample option, divisor 16", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress", + "$TC filter add dev $DEV1 ingress prio 99 handle 1: u32 divisor 256" + ], + "cmdUnderTest": "bash -c \"for mask in 70 f0 ff0 fff0 ff00f0; do $TC filter add dev $DEV1 ingress prio 99 u32 ht 1: sample u32 0x10203040 \\$mask match u8 0 0 classid 1:1; done\"", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "filter protocol all pref 99 u32( (chain|fh|order) [0-9:]+){3} key ht 1 bkt 4 flowid 1:1", + "matchCount": "5", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] } ] diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests.sh index e953f3cd9664..e953f3cd9664 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests.sh |