diff options
1243 files changed, 16565 insertions, 9997 deletions
@@ -197,6 +197,7 @@ Daniel Borkmann <daniel@iogearbox.net> <daniel.borkmann@tik.ee.ethz.ch> Daniel Borkmann <daniel@iogearbox.net> <dborkmann@redhat.com> Daniel Borkmann <daniel@iogearbox.net> <dborkman@redhat.com> Daniel Borkmann <daniel@iogearbox.net> <dxchgb@gmail.com> +Danilo Krummrich <dakr@kernel.org> <dakr@redhat.com> David Brownell <david-b@pacbell.net> David Collins <quic_collinsd@quicinc.com> <collinsd@codeaurora.org> David Heidelberg <david@ixit.cz> <d.okias@gmail.com> @@ -222,6 +223,8 @@ Dmitry Safonov <0x7f454c46@gmail.com> <d.safonov@partner.samsung.com> Dmitry Safonov <0x7f454c46@gmail.com> <dsafonov@virtuozzo.com> Domen Puncer <domen@coderock.org> Douglas Gilbert <dougg@torque.net> +Drew Fustini <fustini@kernel.org> <drew@pdp7.com> +<duje@dujemihanovic.xyz> <duje.mihanovic@skole.hr> Ed L. Cashin <ecashin@coraid.com> Elliot Berman <quic_eberman@quicinc.com> <eberman@codeaurora.org> Enric Balletbo i Serra <eballetbo@kernel.org> <enric.balletbo@collabora.com> @@ -282,6 +285,7 @@ Gustavo Padovan <gustavo@las.ic.unicamp.br> Gustavo Padovan <padovan@profusion.mobi> Hamza Mahfooz <hamzamahfooz@linux.microsoft.com> <hamza.mahfooz@amd.com> Hanjun Guo <guohanjun@huawei.com> <hanjun.guo@linaro.org> +Hans de Goede <hansg@kernel.org> <hdegoede@redhat.com> Hans Verkuil <hverkuil@xs4all.nl> <hansverk@cisco.com> Hans Verkuil <hverkuil@xs4all.nl> <hverkuil-cisco@xs4all.nl> Harry Yoo <harry.yoo@oracle.com> <42.hyeyoo@gmail.com> @@ -412,6 +416,7 @@ Kenneth W Chen <kenneth.w.chen@intel.com> Kenneth Westfield <quic_kwestfie@quicinc.com> <kwestfie@codeaurora.org> Kiran Gunda <quic_kgunda@quicinc.com> <kgunda@codeaurora.org> Kirill Tkhai <tkhai@ya.ru> <ktkhai@virtuozzo.com> +Kirill A. Shutemov <kas@kernel.org> <kirill.shutemov@linux.intel.com> Kishon Vijay Abraham I <kishon@kernel.org> <kishon@ti.com> Konrad Dybcio <konradybcio@kernel.org> <konrad.dybcio@linaro.org> Konrad Dybcio <konradybcio@kernel.org> <konrad.dybcio@somainline.org> @@ -691,9 +696,10 @@ Serge Hallyn <sergeh@kernel.org> <serge.hallyn@canonical.com> Serge Hallyn <sergeh@kernel.org> <serue@us.ibm.com> Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com> Shakeel Butt <shakeel.butt@linux.dev> <shakeelb@google.com> -Shannon Nelson <shannon.nelson@amd.com> <snelson@pensando.io> -Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@intel.com> -Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@oracle.com> +Shannon Nelson <sln@onemain.com> <shannon.nelson@amd.com> +Shannon Nelson <sln@onemain.com> <snelson@pensando.io> +Shannon Nelson <sln@onemain.com> <shannon.nelson@intel.com> +Shannon Nelson <sln@onemain.com> <shannon.nelson@oracle.com> Sharath Chandra Vurukala <quic_sharathv@quicinc.com> <sharathv@codeaurora.org> Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com> Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com> @@ -827,3 +833,6 @@ Yosry Ahmed <yosry.ahmed@linux.dev> <yosryahmed@google.com> Yusuke Goda <goda.yusuke@renesas.com> Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com> Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com> +Zijun Hu <zijun.hu@oss.qualcomm.com> <quic_zijuhu@quicinc.com> +Zijun Hu <zijun.hu@oss.qualcomm.com> <zijuhu@codeaurora.org> +Zijun Hu <zijun_hu@htc.com> @@ -2981,6 +2981,11 @@ S: 521 Pleasant Valley Road S: Potsdam, New York 13676 S: USA +N: Shannon Nelson +E: sln@onemain.com +D: Worked on several network drivers including +D: ixgbe, i40e, ionic, pds_core, pds_vdpa, pds_fwctl + N: Dave Neuer E: dave.neuer@pobox.com D: Helped implement support for Compaq's H31xx series iPAQs diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 54195530e97a..d3da88b26a53 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -56,7 +56,7 @@ Date: January 2009 Contact: Rafael J. Wysocki <rjw@rjwysocki.net> Description: The /sys/devices/.../async attribute allows the user space to - enable or diasble the device's suspend and resume callbacks to + enable or disable the device's suspend and resume callbacks to be executed asynchronously (ie. in separate threads, in parallel with the main suspend/resume thread) during system-wide power transitions (eg. suspend to RAM, hibernation). diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index bf85f4de6862..ab8cd337f43a 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -584,6 +584,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/spectre_v1 /sys/devices/system/cpu/vulnerabilities/spectre_v2 /sys/devices/system/cpu/vulnerabilities/srbds + /sys/devices/system/cpu/vulnerabilities/tsa /sys/devices/system/cpu/vulnerabilities/tsx_async_abort Date: January 2018 Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> diff --git a/Documentation/ABI/testing/sysfs-driver-ufs b/Documentation/ABI/testing/sysfs-driver-ufs index d4140dc6c5ba..615453fcc9ff 100644 --- a/Documentation/ABI/testing/sysfs-driver-ufs +++ b/Documentation/ABI/testing/sysfs-driver-ufs @@ -711,7 +711,7 @@ Description: This file shows the thin provisioning type. This is one of The file is read only. -What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resourse_count +What: /sys/class/scsi_device/*/device/unit_descriptor/physical_memory_resource_count Date: February 2018 Contact: Stanislav Nijnikov <stanislav.nijnikov@wdc.com> Description: This file shows the total physical memory resources. This is diff --git a/Documentation/ABI/testing/sysfs-edac-scrub b/Documentation/ABI/testing/sysfs-edac-scrub index c43be90deab4..ab6014743da5 100644 --- a/Documentation/ABI/testing/sysfs-edac-scrub +++ b/Documentation/ABI/testing/sysfs-edac-scrub @@ -49,6 +49,12 @@ Description: (RO) Supported minimum scrub cycle duration in seconds by the memory scrubber. + Device-based scrub: returns the minimum scrub cycle + supported by the memory device. + + Region-based scrub: returns the max of minimum scrub cycles + supported by individual memory devices that back the region. + What: /sys/bus/edac/devices/<dev-name>/scrubX/max_cycle_duration Date: March 2025 KernelVersion: 6.15 @@ -57,6 +63,16 @@ Description: (RO) Supported maximum scrub cycle duration in seconds by the memory scrubber. + Device-based scrub: returns the maximum scrub cycle supported + by the memory device. + + Region-based scrub: returns the min of maximum scrub cycles + supported by individual memory devices that back the region. + + If the memory device does not provide maximum scrub cycle + information, return the maximum supported value of the scrub + cycle field. + What: /sys/bus/edac/devices/<dev-name>/scrubX/current_cycle_duration Date: March 2025 KernelVersion: 6.15 diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 0cc35a14afbe..bd98ea3175ec 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1732,12 +1732,6 @@ The following nested keys are defined. numa_hint_faults (npn) Number of NUMA hinting faults. - numa_task_migrated (npn) - Number of task migration by NUMA balancing. - - numa_task_swapped (npn) - Number of task swap by NUMA balancing. - pgdemote_kswapd Number of pages demoted by kswapd. diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst index 1302fd1b55e8..6dba18dbb9ab 100644 --- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst +++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst @@ -157,9 +157,7 @@ This is achieved by using the otherwise unused and obsolete VERW instruction in combination with a microcode update. The microcode clears the affected CPU buffers when the VERW instruction is executed. -Kernel reuses the MDS function to invoke the buffer clearing: - - mds_clear_cpu_buffers() +Kernel does the buffer clearing with x86_clear_cpu_buffers(). On MDS affected CPUs, the kernel already invokes CPU buffer clear on kernel/userspace, hypervisor/guest and C-state (idle) transitions. No diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f1f2c0874da9..07e22ba5bfe3 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -7488,6 +7488,19 @@ having this key zero'ed is acceptable. E.g. in testing scenarios. + tsa= [X86] Control mitigation for Transient Scheduler + Attacks on AMD CPUs. Search the following in your + favourite search engine for more details: + + "Technical guidance for mitigating transient scheduler + attacks". + + off - disable the mitigation + on - enable the mitigation (default) + user - mitigate only user/kernel transitions + vm - mitigate only guest/host transitions + + tsc= Disable clocksource stability checks for TSC. Format: <string> [x86] reliable: mark tsc clocksource as reliable, this diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst index dee7b6de864f..ee9b790c0d72 100644 --- a/Documentation/arch/arm64/booting.rst +++ b/Documentation/arch/arm64/booting.rst @@ -234,7 +234,7 @@ Before jumping into the kernel, the following conditions must be met: - If the kernel is entered at EL1: - - ICC.SRE_EL2.Enable (bit 3) must be initialised to 0b1 + - ICC_SRE_EL2.Enable (bit 3) must be initialised to 0b1 - ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b1. - The DT or ACPI tables must describe a GICv3 interrupt controller. diff --git a/Documentation/arch/x86/mds.rst b/Documentation/arch/x86/mds.rst index 5a2e6c0ef04a..3518671e1a85 100644 --- a/Documentation/arch/x86/mds.rst +++ b/Documentation/arch/x86/mds.rst @@ -93,7 +93,7 @@ enters a C-state. The kernel provides a function to invoke the buffer clearing: - mds_clear_cpu_buffers() + x86_clear_cpu_buffers() Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path. Other than CFLAGS.ZF, this macro doesn't clobber any registers. @@ -185,9 +185,9 @@ Mitigation points idle clearing would be a window dressing exercise and is therefore not activated. - The invocation is controlled by the static key mds_idle_clear which is - switched depending on the chosen mitigation mode and the SMT state of - the system. + The invocation is controlled by the static key cpu_buf_idle_clear which is + switched depending on the chosen mitigation mode and the SMT state of the + system. The buffer clear is only invoked before entering the C-State to prevent that stale data from the idling CPU from spilling to the Hyper-Thread diff --git a/Documentation/bpf/map_hash.rst b/Documentation/bpf/map_hash.rst index d2343952f2cb..8606bf958a8c 100644 --- a/Documentation/bpf/map_hash.rst +++ b/Documentation/bpf/map_hash.rst @@ -233,10 +233,16 @@ attempts in order to enforce the LRU property which have increasing impacts on other CPUs involved in the following operation attempts: - Attempt to use CPU-local state to batch operations -- Attempt to fetch free nodes from global lists +- Attempt to fetch ``target_free`` free nodes from global lists - Attempt to pull any node from a global list and remove it from the hashmap - Attempt to pull any node from any CPU's list and remove it from the hashmap +The number of nodes to borrow from the global list in a batch, ``target_free``, +depends on the size of the map. Larger batch size reduces lock contention, but +may also exhaust the global structure. The value is computed at map init to +avoid exhaustion, by limiting aggregate reservation by all CPUs to half the map +size. With a minimum of a single element and maximum budget of 128 at a time. + This algorithm is described visually in the following diagram. See the description in commit 3a08c2fd7634 ("bpf: LRU List") for a full explanation of the corresponding operations: diff --git a/Documentation/bpf/map_lru_hash_update.dot b/Documentation/bpf/map_lru_hash_update.dot index a0fee349d29c..ab10058f5b79 100644 --- a/Documentation/bpf/map_lru_hash_update.dot +++ b/Documentation/bpf/map_lru_hash_update.dot @@ -35,18 +35,18 @@ digraph { fn_bpf_lru_list_pop_free_to_local [shape=rectangle,fillcolor=2, label="Flush local pending, Rotate Global list, move - LOCAL_FREE_TARGET + target_free from global -> local"] // Also corresponds to: // fn__local_list_flush() // fn_bpf_lru_list_rotate() fn___bpf_lru_node_move_to_free[shape=diamond,fillcolor=2, - label="Able to free\nLOCAL_FREE_TARGET\nnodes?"] + label="Able to free\ntarget_free\nnodes?"] fn___bpf_lru_list_shrink_inactive [shape=rectangle,fillcolor=3, label="Shrink inactive list up to remaining - LOCAL_FREE_TARGET + target_free (global LRU -> local)"] fn___bpf_lru_list_shrink [shape=diamond,fillcolor=2, label="> 0 entries in\nlocal free list?"] diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml index 2985c8c717d7..5403242545ab 100644 --- a/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml +++ b/Documentation/devicetree/bindings/clock/mediatek,mt8188-clock.yaml @@ -52,6 +52,9 @@ properties: '#clock-cells': const: 1 + '#reset-cells': + const: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml index 9b5f3f3eab19..e69b6343a8eb 100644 --- a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml +++ b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi83.yaml @@ -118,15 +118,11 @@ $defs: ti,lvds-vod-swing-clock-microvolt: description: LVDS diferential output voltage <min max> for clock lanes in microvolts. - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 2 maxItems: 2 ti,lvds-vod-swing-data-microvolt: description: LVDS diferential output voltage <min max> for data lanes in microvolts. - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 2 maxItems: 2 allOf: diff --git a/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml b/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml index b57ae6963e62..6b6f6762d122 100644 --- a/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml @@ -97,7 +97,10 @@ properties: resets: items: - - description: module reset + - description: + Module reset. This property is optional for controllers in Tegra194, + Tegra234 etc where an internal software reset is available as an + alternative. reset-names: items: @@ -116,6 +119,13 @@ properties: - const: rx - const: tx +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + allOf: - $ref: /schemas/i2c/i2c-controller.yaml - if: @@ -169,6 +179,18 @@ allOf: properties: power-domains: false + - if: + not: + properties: + compatible: + contains: + enum: + - nvidia,tegra194-i2c + then: + required: + - resets + - reset-names + unevaluatedProperties: false examples: diff --git a/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml b/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml index eddfd329c67b..69ac5db8b914 100644 --- a/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/realtek,rtl9301-i2c.yaml @@ -26,7 +26,8 @@ properties: - const: realtek,rtl9301-i2c reg: - description: Register offset and size this I2C controller. + items: + - description: Register offset and size this I2C controller. "#address-cells": const: 1 diff --git a/Documentation/devicetree/bindings/input/elan,ekth6915.yaml b/Documentation/devicetree/bindings/input/elan,ekth6915.yaml index cb3e1801b0d3..0840e4ab28b7 100644 --- a/Documentation/devicetree/bindings/input/elan,ekth6915.yaml +++ b/Documentation/devicetree/bindings/input/elan,ekth6915.yaml @@ -4,14 +4,14 @@ $id: http://devicetree.org/schemas/input/elan,ekth6915.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Elan eKTH6915 touchscreen controller +title: Elan I2C-HID touchscreen controllers maintainers: - Douglas Anderson <dianders@chromium.org> description: - Supports the Elan eKTH6915 touchscreen controller. - This touchscreen controller uses the i2c-hid protocol with a reset GPIO. + Supports the Elan eKTH6915 and other I2C-HID touchscreen controllers. + These touchscreen controller use the i2c-hid protocol with a reset GPIO. allOf: - $ref: /schemas/input/touchscreen/touchscreen.yaml# @@ -23,12 +23,14 @@ properties: - enum: - elan,ekth5015m - const: elan,ekth6915 + - items: + - const: elan,ekth8d18 + - const: elan,ekth6a12nay - enum: - elan,ekth6915 - elan,ekth6a12nay - reg: - const: 0x10 + reg: true interrupts: maxItems: 1 diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml index 7b6a2fde8175..19934d5c24e5 100644 --- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml +++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml @@ -23,7 +23,7 @@ properties: - allwinner,sun20i-d1-emac - allwinner,sun50i-h6-emac - allwinner,sun50i-h616-emac0 - - allwinner,sun55i-a523-emac0 + - allwinner,sun55i-a523-gmac0 - const: allwinner,sun50i-a64-emac reg: diff --git a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml index 4dd2dc9c678b..8afbd9ebd73f 100644 --- a/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/sophgo,sg2044-dwmac.yaml @@ -80,6 +80,8 @@ examples: interrupt-parent = <&intc>; interrupts = <296 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "macirq"; + phy-handle = <&phy0>; + phy-mode = "rgmii-id"; resets = <&rst 30>; reset-names = "stmmaceth"; snps,multicast-filter-bins = <0>; @@ -91,7 +93,6 @@ examples: snps,mtl-rx-config = <&gmac0_mtl_rx_setup>; snps,mtl-tx-config = <&gmac0_mtl_tx_setup>; snps,axi-config = <&gmac0_stmmac_axi_setup>; - status = "disabled"; gmac0_mtl_rx_setup: rx-queues-config { snps,rx-queues-to-use = <8>; diff --git a/Documentation/devicetree/bindings/pmem/pmem-region.txt b/Documentation/devicetree/bindings/pmem/pmem-region.txt deleted file mode 100644 index cd79975e85ec..000000000000 --- a/Documentation/devicetree/bindings/pmem/pmem-region.txt +++ /dev/null @@ -1,65 +0,0 @@ -Device-tree bindings for persistent memory regions ------------------------------------------------------ - -Persistent memory refers to a class of memory devices that are: - - a) Usable as main system memory (i.e. cacheable), and - b) Retain their contents across power failure. - -Given b) it is best to think of persistent memory as a kind of memory mapped -storage device. To ensure data integrity the operating system needs to manage -persistent regions separately to the normal memory pool. To aid with that this -binding provides a standardised interface for discovering where persistent -memory regions exist inside the physical address space. - -Bindings for the region nodes: ------------------------------ - -Required properties: - - compatible = "pmem-region" - - - reg = <base, size>; - The reg property should specify an address range that is - translatable to a system physical address range. This address - range should be mappable as normal system memory would be - (i.e cacheable). - - If the reg property contains multiple address ranges - each address range will be treated as though it was specified - in a separate device node. Having multiple address ranges in a - node implies no special relationship between the two ranges. - -Optional properties: - - Any relevant NUMA associativity properties for the target platform. - - - volatile; This property indicates that this region is actually - backed by non-persistent memory. This lets the OS know that it - may skip the cache flushes required to ensure data is made - persistent after a write. - - If this property is absent then the OS must assume that the region - is backed by non-volatile memory. - -Examples: --------------------- - - /* - * This node specifies one 4KB region spanning from - * 0x5000 to 0x5fff that is backed by non-volatile memory. - */ - pmem@5000 { - compatible = "pmem-region"; - reg = <0x00005000 0x00001000>; - }; - - /* - * This node specifies two 4KB regions that are backed by - * volatile (normal) memory. - */ - pmem@6000 { - compatible = "pmem-region"; - reg = < 0x00006000 0x00001000 - 0x00008000 0x00001000 >; - volatile; - }; - diff --git a/Documentation/devicetree/bindings/pmem/pmem-region.yaml b/Documentation/devicetree/bindings/pmem/pmem-region.yaml new file mode 100644 index 000000000000..bd0f0c793f03 --- /dev/null +++ b/Documentation/devicetree/bindings/pmem/pmem-region.yaml @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pmem-region.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +maintainers: + - Oliver O'Halloran <oohall@gmail.com> + +title: Persistent Memory Regions + +description: | + Persistent memory refers to a class of memory devices that are: + + a) Usable as main system memory (i.e. cacheable), and + b) Retain their contents across power failure. + + Given b) it is best to think of persistent memory as a kind of memory mapped + storage device. To ensure data integrity the operating system needs to manage + persistent regions separately to the normal memory pool. To aid with that this + binding provides a standardised interface for discovering where persistent + memory regions exist inside the physical address space. + +properties: + compatible: + const: pmem-region + + reg: + maxItems: 1 + + volatile: + description: + Indicates the region is volatile (non-persistent) and the OS can skip + cache flushes for writes + type: boolean + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + pmem@5000 { + compatible = "pmem-region"; + reg = <0x00005000 0x00001000>; + }; diff --git a/Documentation/devicetree/bindings/serial/8250.yaml b/Documentation/devicetree/bindings/serial/8250.yaml index 33d2016b6509..c6bc27709bf7 100644 --- a/Documentation/devicetree/bindings/serial/8250.yaml +++ b/Documentation/devicetree/bindings/serial/8250.yaml @@ -45,7 +45,7 @@ allOf: - ns16550 - ns16550a then: - anyOf: + oneOf: - required: [ clock-frequency ] - required: [ clocks ] diff --git a/Documentation/devicetree/bindings/serial/altera_jtaguart.txt b/Documentation/devicetree/bindings/serial/altera_jtaguart.txt deleted file mode 100644 index 55a901051e8f..000000000000 --- a/Documentation/devicetree/bindings/serial/altera_jtaguart.txt +++ /dev/null @@ -1,5 +0,0 @@ -Altera JTAG UART - -Required properties: -- compatible : should be "ALTR,juart-1.0" <DEPRECATED> -- compatible : should be "altr,juart-1.0" diff --git a/Documentation/devicetree/bindings/serial/altera_uart.txt b/Documentation/devicetree/bindings/serial/altera_uart.txt deleted file mode 100644 index 81bf7ffb1a81..000000000000 --- a/Documentation/devicetree/bindings/serial/altera_uart.txt +++ /dev/null @@ -1,8 +0,0 @@ -Altera UART - -Required properties: -- compatible : should be "ALTR,uart-1.0" <DEPRECATED> -- compatible : should be "altr,uart-1.0" - -Optional properties: -- clock-frequency : frequency of the clock input to the UART diff --git a/Documentation/devicetree/bindings/serial/altr,juart-1.0.yaml b/Documentation/devicetree/bindings/serial/altr,juart-1.0.yaml new file mode 100644 index 000000000000..02e20fa591da --- /dev/null +++ b/Documentation/devicetree/bindings/serial/altr,juart-1.0.yaml @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/altr,juart-1.0.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Altera JTAG UART + +maintainers: + - Dinh Nguyen <dinguyen@kernel.org> + +properties: + compatible: + const: altr,juart-1.0 + +required: + - compatible + +additionalProperties: false diff --git a/Documentation/devicetree/bindings/serial/altr,uart-1.0.yaml b/Documentation/devicetree/bindings/serial/altr,uart-1.0.yaml new file mode 100644 index 000000000000..72d4972e1e22 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/altr,uart-1.0.yaml @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/altr,uart-1.0.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Altera UART + +maintainers: + - Dinh Nguyen <dinguyen@kernel.org> + +allOf: + - $ref: /schemas/serial/serial.yaml# + +properties: + compatible: + const: altr,uart-1.0 + + clock-frequency: + description: Frequency of the clock input to the UART. + +required: + - compatible + +unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml index 234089b5954d..b43df10c5ef4 100644 --- a/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml +++ b/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas//soc/fsl/fsl,ls1028a-reset.yaml# +$id: http://devicetree.org/schemas/soc/fsl/fsl,ls1028a-reset.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# title: Freescale Layerscape Reset Registers Module diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 3616d7161dab..a5734bdd1cc7 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1249,3 +1249,12 @@ Using try_lookup_noperm() will require linux/namei.h to be included. Calling conventions for ->d_automount() have changed; we should *not* grab an extra reference to new mount - it should be returned with refcount 1. + +--- + +collect_mounts()/drop_collected_mounts()/iterate_mounts() are gone now. +Replacement is collect_paths()/drop_collected_path(), with no special +iterator needed. Instead of a cloned mount tree, the new interface returns +an array of struct path, one for each mount collect_mounts() would've +created. These struct path point to locations in the caller's namespace +that would be roots of the cloned mounts. diff --git a/Documentation/gpu/nouveau.rst b/Documentation/gpu/nouveau.rst index b8c801e0068c..cab2e81013bc 100644 --- a/Documentation/gpu/nouveau.rst +++ b/Documentation/gpu/nouveau.rst @@ -25,7 +25,7 @@ providing a consistent API to upper layers of the driver stack. GSP Support ------------------------ -.. kernel-doc:: drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +.. kernel-doc:: drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c :doc: GSP message queue element .. kernel-doc:: drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index 4cbfe666e6f5..b29d62eefa16 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -6,6 +6,9 @@ $schema: https://json-schema.org/draft-07/schema # Common defines $defs: + name: + type: string + pattern: ^[0-9a-z-]+$ uint: type: integer minimum: 0 @@ -76,7 +79,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' header: description: For C-compatible languages, header which already defines this value. type: string @@ -103,7 +106,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' value: type: integer doc: @@ -132,7 +135,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' type: description: The netlink attribute type enum: [ u8, u16, u32, u64, s8, s16, s32, s64, string, binary ] @@ -169,7 +172,7 @@ properties: name: description: | Name used when referring to this space in other definitions, not used outside of the spec. - type: string + $ref: '#/$defs/name' name-prefix: description: | Prefix for the C enum name of the attributes. Default family[name]-set[name]-a- @@ -206,7 +209,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' type: &attr-type description: The netlink attribute type enum: [ unused, pad, flag, binary, bitfield32, @@ -348,7 +351,7 @@ properties: properties: name: description: Name of the operation, also defining its C enum value in uAPI. - type: string + $ref: '#/$defs/name' doc: description: Documentation for the command. type: string diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml index 40efbbad76ab..7b1ec153e834 100644 --- a/Documentation/netlink/genetlink.yaml +++ b/Documentation/netlink/genetlink.yaml @@ -6,6 +6,9 @@ $schema: https://json-schema.org/draft-07/schema # Common defines $defs: + name: + type: string + pattern: ^[0-9a-z-]+$ uint: type: integer minimum: 0 @@ -29,7 +32,7 @@ additionalProperties: False properties: name: description: Name of the genetlink family. - type: string + $ref: '#/$defs/name' doc: type: string protocol: @@ -48,7 +51,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' header: description: For C-compatible languages, header which already defines this value. type: string @@ -75,7 +78,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' value: type: integer doc: @@ -96,7 +99,7 @@ properties: name: description: | Name used when referring to this space in other definitions, not used outside of the spec. - type: string + $ref: '#/$defs/name' name-prefix: description: | Prefix for the C enum name of the attributes. Default family[name]-set[name]-a- @@ -121,7 +124,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' type: &attr-type enum: [ unused, pad, flag, binary, uint, sint, u8, u16, u32, u64, s8, s16, s32, s64, @@ -243,7 +246,7 @@ properties: properties: name: description: Name of the operation, also defining its C enum value in uAPI. - type: string + $ref: '#/$defs/name' doc: description: Documentation for the command. type: string @@ -327,7 +330,7 @@ properties: name: description: | The name for the group, used to form the define and the value of the define. - type: string + $ref: '#/$defs/name' flags: *cmd_flags kernel-family: diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml index e34bf23897fa..246fa07bccf6 100644 --- a/Documentation/netlink/netlink-raw.yaml +++ b/Documentation/netlink/netlink-raw.yaml @@ -6,6 +6,12 @@ $schema: https://json-schema.org/draft-07/schema # Common defines $defs: + name: + type: string + pattern: ^[0-9a-z-]+$ + name-cap: + type: string + pattern: ^[0-9a-zA-Z-]+$ uint: type: integer minimum: 0 @@ -71,7 +77,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' header: description: For C-compatible languages, header which already defines this value. type: string @@ -98,7 +104,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' value: type: integer doc: @@ -124,7 +130,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name-cap' type: description: | The netlink attribute type. Members of type 'binary' or 'pad' @@ -183,7 +189,7 @@ properties: name: description: | Name used when referring to this space in other definitions, not used outside of the spec. - type: string + $ref: '#/$defs/name' name-prefix: description: | Prefix for the C enum name of the attributes. Default family[name]-set[name]-a- @@ -220,7 +226,7 @@ properties: additionalProperties: False properties: name: - type: string + $ref: '#/$defs/name' type: &attr-type description: The netlink attribute type enum: [ unused, pad, flag, binary, bitfield32, @@ -408,7 +414,7 @@ properties: properties: name: description: Name of the operation, also defining its C enum value in uAPI. - type: string + $ref: '#/$defs/name' doc: description: Documentation for the command. type: string diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index 05fee1b7fe19..38ddc04f9e6d 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -38,15 +38,15 @@ definitions: - name: dsa - - name: pci_pf + name: pci-pf - - name: pci_vf + name: pci-vf - name: virtual - name: unused - - name: pci_sf + name: pci-sf - type: enum name: port-fn-state @@ -220,7 +220,7 @@ definitions: - name: flag - - name: nul_string + name: nul-string value: 10 - name: binary diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml index 8feefeae5376..f434140b538e 100644 --- a/Documentation/netlink/specs/dpll.yaml +++ b/Documentation/netlink/specs/dpll.yaml @@ -188,7 +188,7 @@ definitions: value: 10000 - type: const - name: pin-frequency-77_5-khz + name: pin-frequency-77-5-khz value: 77500 - type: const diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 9f98715a6512..348c6ad548f5 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -7,6 +7,9 @@ protocol: genetlink-legacy doc: Partial family for Ethtool Netlink. uapi-header: linux/ethtool_netlink_generated.h +c-family-name: ethtool-genl-name +c-version-name: ethtool-genl-version + definitions: - name: udp-tunnel-type @@ -45,7 +48,7 @@ definitions: name: started doc: The firmware flashing process has started. - - name: in_progress + name: in-progress doc: The firmware flashing process is in progress. - name: completed @@ -1419,7 +1422,7 @@ attribute-sets: name: hkey type: binary - - name: input_xfrm + name: input-xfrm type: u32 - name: start-context @@ -2235,7 +2238,7 @@ operations: - hfunc - indir - hkey - - input_xfrm + - input-xfrm dump: request: attributes: diff --git a/Documentation/netlink/specs/fou.yaml b/Documentation/netlink/specs/fou.yaml index 0af5ab842c04..b02ab19817d3 100644 --- a/Documentation/netlink/specs/fou.yaml +++ b/Documentation/netlink/specs/fou.yaml @@ -15,7 +15,7 @@ kernel-policy: global definitions: - type: enum - name: encap_type + name: encap-type name-prefix: fou-encap- enum-name: entries: [ unspec, direct, gue ] @@ -43,26 +43,26 @@ attribute-sets: name: type type: u8 - - name: remcsum_nopartial + name: remcsum-nopartial type: flag - - name: local_v4 + name: local-v4 type: u32 - - name: local_v6 + name: local-v6 type: binary checks: min-len: 16 - - name: peer_v4 + name: peer-v4 type: u32 - - name: peer_v6 + name: peer-v6 type: binary checks: min-len: 16 - - name: peer_port + name: peer-port type: u16 byte-order: big-endian - @@ -90,12 +90,12 @@ operations: - port - ipproto - type - - remcsum_nopartial - - local_v4 - - peer_v4 - - local_v6 - - peer_v6 - - peer_port + - remcsum-nopartial + - local-v4 + - peer-v4 + - local-v6 + - peer-v6 + - peer-port - ifindex - @@ -112,11 +112,11 @@ operations: - af - ifindex - port - - peer_port - - local_v4 - - peer_v4 - - local_v6 - - peer_v6 + - peer-port + - local-v4 + - peer-v4 + - local-v6 + - peer-v6 - name: get diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml index dfd017780d2f..fb57860fe778 100644 --- a/Documentation/netlink/specs/mptcp_pm.yaml +++ b/Documentation/netlink/specs/mptcp_pm.yaml @@ -57,21 +57,21 @@ definitions: doc: >- A new subflow has been established. 'error' should not be set. Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | - daddr6, sport, dport, backup, if_idx [, error]. + daddr6, sport, dport, backup, if-idx [, error]. - name: sub-closed doc: >- A subflow has been closed. An error (copy of sk_err) could be set if an error has been detected for this subflow. Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | - daddr6, sport, dport, backup, if_idx [, error]. + daddr6, sport, dport, backup, if-idx [, error]. - name: sub-priority value: 13 doc: >- The priority of a subflow has changed. 'error' should not be set. Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | - daddr6, sport, dport, backup, if_idx [, error]. + daddr6, sport, dport, backup, if-idx [, error]. - name: listener-created value: 15 @@ -255,7 +255,7 @@ attribute-sets: name: timeout type: u32 - - name: if_idx + name: if-idx type: u32 - name: reset-reason diff --git a/Documentation/netlink/specs/nfsd.yaml b/Documentation/netlink/specs/nfsd.yaml index c87658114852..8d1a3c01708f 100644 --- a/Documentation/netlink/specs/nfsd.yaml +++ b/Documentation/netlink/specs/nfsd.yaml @@ -27,7 +27,7 @@ attribute-sets: name: proc type: u32 - - name: service_time + name: service-time type: s64 - name: pad @@ -139,7 +139,7 @@ operations: - prog - version - proc - - service_time + - service-time - saddr4 - daddr4 - saddr6 diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml index 46f5d1cd8a5f..7974aa7d8905 100644 --- a/Documentation/netlink/specs/ovs_flow.yaml +++ b/Documentation/netlink/specs/ovs_flow.yaml @@ -216,7 +216,7 @@ definitions: type: struct members: - - name: nd_target + name: nd-target type: binary len: 16 byte-order: big-endian @@ -258,12 +258,12 @@ definitions: type: struct members: - - name: vlan_tpid + name: vlan-tpid type: u16 byte-order: big-endian doc: Tag protocol identifier (TPID) to push. - - name: vlan_tci + name: vlan-tci type: u16 byte-order: big-endian doc: Tag control identifier (TCI) to push. diff --git a/Documentation/netlink/specs/rt-link.yaml b/Documentation/netlink/specs/rt-link.yaml index b41b31eebcae..28c4cf66517c 100644 --- a/Documentation/netlink/specs/rt-link.yaml +++ b/Documentation/netlink/specs/rt-link.yaml @@ -603,7 +603,7 @@ definitions: name: optmask type: u32 - - name: if_stats_msg + name: if-stats-msg type: struct members: - @@ -2486,7 +2486,7 @@ operations: name: getstats doc: Get / dump link stats. attribute-set: stats-attrs - fixed-header: if_stats_msg + fixed-header: if-stats-msg do: request: value: 94 diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml index cb7ea7d62e56..42d74c9aeb54 100644 --- a/Documentation/netlink/specs/tc.yaml +++ b/Documentation/netlink/specs/tc.yaml @@ -232,7 +232,7 @@ definitions: type: u8 doc: log(P_max / (qth-max - qth-min)) - - name: Scell_log + name: Scell-log type: u8 doc: cell size for idle damping - @@ -253,7 +253,7 @@ definitions: name: DPs type: u32 - - name: def_DP + name: def-DP type: u32 - name: grio diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst index af7db0e91f6b..a52850602cd8 100644 --- a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst +++ b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst @@ -66,7 +66,7 @@ Admin Function driver As mentioned above RVU PF0 is called the admin function (AF), this driver supports resource provisioning and configuration of functional blocks. Doesn't handle any I/O. It sets up few basic stuff but most of the -funcionality is achieved via configuration requests from PFs and VFs. +functionality is achieved via configuration requests from PFs and VFs. PF/VFs communicates with AF via a shared memory region (mailbox). Upon receiving requests AF does resource provisioning and other HW configuration. diff --git a/Documentation/networking/tls.rst b/Documentation/networking/tls.rst index c7904a1bc167..36cc7afc2527 100644 --- a/Documentation/networking/tls.rst +++ b/Documentation/networking/tls.rst @@ -16,11 +16,13 @@ User interface Creating a TLS connection ------------------------- -First create a new TCP socket and set the TLS ULP. +First create a new TCP socket and once the connection is established set the +TLS ULP. .. code-block:: c sock = socket(AF_INET, SOCK_STREAM, 0); + connect(sock, addr, addrlen); setsockopt(sock, SOL_TCP, TCP_ULP, "tls", sizeof("tls")); Setting the TLS ULP allows us to set/get TLS socket options. Currently diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index da6bf0f6d01e..34e00848e0da 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -290,6 +290,7 @@ an involved disclosed party. The current ambassadors list: AMD Tom Lendacky <thomas.lendacky@amd.com> Ampere Darren Hart <darren@os.amperecomputing.com> ARM Catalin Marinas <catalin.marinas@arm.com> + IBM Power Madhavan Srinivasan <maddy@linux.ibm.com> IBM Z Christian Borntraeger <borntraeger@de.ibm.com> Intel Tony Luck <tony.luck@intel.com> Qualcomm Trilok Soni <quic_tsoni@quicinc.com> diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index 1ac62dc3a66f..e1755610b4bc 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -312,7 +312,7 @@ Posting as one thread is discouraged because it confuses patchwork (as of patchwork 2.2.2). Co-posting selftests --------------------- +~~~~~~~~~~~~~~~~~~~~ Selftests should be part of the same series as the code changes. Specifically for fixes both code change and related test should go into diff --git a/Documentation/sound/codecs/cs35l56.rst b/Documentation/sound/codecs/cs35l56.rst index 98c6f6c74394..57d1964453e1 100644 --- a/Documentation/sound/codecs/cs35l56.rst +++ b/Documentation/sound/codecs/cs35l56.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0-only -===================================================================== -Audio drivers for Cirrus Logic CS35L54/56/57 Boosted Smart Amplifiers -===================================================================== +======================================================================== +Audio drivers for Cirrus Logic CS35L54/56/57/63 Boosted Smart Amplifiers +======================================================================== :Copyright: 2025 Cirrus Logic, Inc. and Cirrus Logic International Semiconductor Ltd. @@ -13,11 +13,11 @@ Summary The high-level summary of this document is: -**If you have a laptop that uses CS35L54/56/57 amplifiers but audio is not +**If you have a laptop that uses CS35L54/56/57/63 amplifiers but audio is not working, DO NOT ATTEMPT TO USE FIRMWARE AND SETTINGS FROM ANOTHER LAPTOP, EVEN IF THAT LAPTOP SEEMS SIMILAR.** -The CS35L54/56/57 amplifiers must be correctly configured for the power +The CS35L54/56/57/63 amplifiers must be correctly configured for the power supply voltage, speaker impedance, maximum speaker voltage/current, and other external hardware connections. @@ -34,6 +34,7 @@ The cs35l56 drivers support: * CS35L54 * CS35L56 * CS35L57 +* CS35L63 There are two drivers in the kernel @@ -104,6 +105,13 @@ In this example the SSID is 10280c63. The format of the firmware file names is: +SoundWire (except CS35L56 Rev B0): + cs35lxx-b0-dsp1-misc-SSID[-spkidX]-l?u? + +SoundWire CS35L56 Rev B0: + cs35lxx-b0-dsp1-misc-SSID[-spkidX]-ampN + +Non-SoundWire (HDA and I2S): cs35lxx-b0-dsp1-misc-SSID[-spkidX]-ampN Where: @@ -111,12 +119,18 @@ Where: * cs35lxx-b0 is the amplifier model and silicon revision. This information is logged by the driver during initialization. * SSID is the 8-digit hexadecimal SSID value. + * l?u? is the physical address on the SoundWire bus of the amp this + file applies to. * ampN is the amplifier number (for example amp1). This is the same as the prefix on the ALSA control names except that it is always lower-case in the file name. * spkidX is an optional part, used for laptops that have firmware configurations for different makes and models of internal speakers. +The CS35L56 Rev B0 continues to use the old filename scheme because a +large number of firmware files have already been published with these +names. + Sound Open Firmware and ALSA topology files ------------------------------------------- diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 1bd2d42e6424..43ed57e048a8 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6645,7 +6645,8 @@ to the byte array. .. note:: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN, - KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding + KVM_EXIT_EPR, KVM_EXIT_HYPERCALL, KVM_EXIT_TDX, + KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding operations are complete (and guest state is consistent) only after userspace has re-entered the kernel with KVM_RUN. The kernel side will first finish incomplete operations and then check for pending signals. @@ -7176,6 +7177,69 @@ The valid value for 'flags' is: :: + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + u64 ret; + u64 data[5]; + } unknown; + struct { + u64 ret; + u64 gpa; + u64 size; + } get_quote; + struct { + u64 ret; + u64 leaf; + u64 r11, r12, r13, r14; + } get_tdvmcall_info; + struct { + u64 ret; + u64 vector; + } setup_event_notify; + }; + } tdx; + +Process a TDVMCALL from the guest. KVM forwards select TDVMCALL based +on the Guest-Hypervisor Communication Interface (GHCI) specification; +KVM bridges these requests to the userspace VMM with minimal changes, +placing the inputs in the union and copying them back to the guest +on re-entry. + +Flags are currently always zero, whereas ``nr`` contains the TDVMCALL +number from register R11. The remaining field of the union provide the +inputs and outputs of the TDVMCALL. Currently the following values of +``nr`` are defined: + + * ``TDVMCALL_GET_QUOTE``: the guest has requested to generate a TD-Quote + signed by a service hosting TD-Quoting Enclave operating on the host. + Parameters and return value are in the ``get_quote`` field of the union. + The ``gpa`` field and ``size`` specify the guest physical address + (without the shared bit set) and the size of a shared-memory buffer, in + which the TDX guest passes a TD Report. The ``ret`` field represents + the return value of the GetQuote request. When the request has been + queued successfully, the TDX guest can poll the status field in the + shared-memory area to check whether the Quote generation is completed or + not. When completed, the generated Quote is returned via the same buffer. + + * ``TDVMCALL_GET_TD_VM_CALL_INFO``: the guest has requested the support + status of TDVMCALLs. The output values for the given leaf should be + placed in fields from ``r11`` to ``r14`` of the ``get_tdvmcall_info`` + field of the union. + +* ``TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT``: the guest has requested to +set up a notification interrupt for vector ``vector``. + +KVM may add support for more values in the future that may cause a userspace +exit, even without calls to ``KVM_ENABLE_CAP`` or similar. In this case, +it will enter with output fields already valid; in the common case, the +``unknown.ret`` field of the union will be ``TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED``. +Userspace need not do anything if it does not wish to support a TDVMCALL. +:: + /* Fix the size of the union. */ char padding[256]; }; diff --git a/Documentation/virt/kvm/x86/intel-tdx.rst b/Documentation/virt/kvm/x86/intel-tdx.rst index 76bdd95334d6..5efac62c92c7 100644 --- a/Documentation/virt/kvm/x86/intel-tdx.rst +++ b/Documentation/virt/kvm/x86/intel-tdx.rst @@ -79,7 +79,20 @@ to be configured to the TDX guest. struct kvm_tdx_capabilities { __u64 supported_attrs; __u64 supported_xfam; - __u64 reserved[254]; + + /* TDG.VP.VMCALL hypercalls executed in kernel and forwarded to + * userspace, respectively + */ + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + + /* TDG.VP.VMCALL instruction executions subfunctions executed in kernel + * and forwarded to userspace, respectively + */ + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; /* Configurable CPUID bits for userspace */ struct kvm_cpuid2 cpuid; diff --git a/Documentation/wmi/acpi-interface.rst b/Documentation/wmi/acpi-interface.rst index f1b28835d23c..1ef003b033bf 100644 --- a/Documentation/wmi/acpi-interface.rst +++ b/Documentation/wmi/acpi-interface.rst @@ -36,7 +36,7 @@ Offset Size (in bytes) Content The WMI object flags control whether the method or notification ID is used: -- 0x1: Data block usage is expensive and must be explicitly enabled/disabled. +- 0x1: Data block is expensive to collect. - 0x2: Data block contains WMI methods. - 0x4: Data block contains ASCIZ string. - 0x8: Data block describes a WMI event, use notification ID instead @@ -83,14 +83,18 @@ event as hexadecimal value. Their first parameter is an integer with a value of 0 if the WMI event should be disabled, other values will enable the WMI event. +Those ACPI methods are always called even for WMI events not registered as +being expensive to collect to match the behavior of the Windows driver. + WCxx ACPI methods ----------------- -Similar to the ``WExx`` ACPI methods, except that it controls data collection -instead of events and thus the last two characters of the ACPI method name are -the method ID of the data block to enable/disable. +Similar to the ``WExx`` ACPI methods, except that instead of WMI events it controls +data collection of data blocks registered as being expensive to collect. Thus the +last two characters of the ACPI method name are the method ID of the data block +to enable/disable. Those ACPI methods are also called before setting data blocks to match the -behaviour of the Windows driver. +behavior of the Windows driver. _WED ACPI method ---------------- diff --git a/MAINTAINERS b/MAINTAINERS index 0c1d245bf7b8..60bba48f5479 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -207,7 +207,7 @@ X: arch/*/include/uapi/ X: include/uapi/ ABIT UGURU 1,2 HARDWARE MONITOR DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-hwmon@vger.kernel.org S: Maintained F: drivers/hwmon/abituguru.c @@ -371,7 +371,7 @@ S: Maintained F: drivers/platform/x86/quickstart.c ACPI SERIAL MULTI INSTANTIATE DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/serial-multi-instantiate.c @@ -1157,7 +1157,6 @@ F: arch/x86/include/asm/amd/node.h F: arch/x86/kernel/amd_node.c AMD PDS CORE DRIVER -M: Shannon Nelson <shannon.nelson@amd.com> M: Brett Creeley <brett.creeley@amd.com> L: netdev@vger.kernel.org S: Maintained @@ -3551,7 +3550,7 @@ F: arch/arm64/boot/Makefile F: scripts/make_fit.py ARM64 PLATFORM DRIVERS -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> R: Bryan O'Donoghue <bryan.odonoghue@linaro.org> L: platform-driver-x86@vger.kernel.org @@ -3712,7 +3711,7 @@ F: drivers/platform/x86/asus*.c F: drivers/platform/x86/eeepc*.c ASUS TF103C DOCK DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: platform-driver-x86@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git @@ -4182,6 +4181,7 @@ F: include/linux/cpumask_types.h F: include/linux/find.h F: include/linux/nodemask.h F: include/linux/nodemask_types.h +F: include/uapi/linux/bits.h F: include/vdso/bits.h F: lib/bitmap-str.c F: lib/bitmap.c @@ -4194,6 +4194,7 @@ F: tools/include/linux/bitfield.h F: tools/include/linux/bitmap.h F: tools/include/linux/bits.h F: tools/include/linux/find.h +F: tools/include/uapi/linux/bits.h F: tools/include/vdso/bits.h F: tools/lib/bitmap.c F: tools/lib/find_bit.c @@ -5614,14 +5615,14 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git F: drivers/usb/chipidea/ CHIPONE ICN8318 I2C TOUCHSCREEN DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml F: drivers/input/touchscreen/chipone_icn8318.c CHIPONE ICN8505 I2C TOUCHSCREEN DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org S: Maintained F: drivers/input/touchscreen/chipone_icn8505.c @@ -6919,7 +6920,7 @@ F: include/dt-bindings/pmu/exynos_ppmu.h F: include/linux/devfreq-event.h DEVICE RESOURCE MANAGEMENT HELPERS -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> R: Matti Vaittinen <mazziesaccount@gmail.com> S: Maintained F: include/linux/devm-helpers.h @@ -7518,7 +7519,7 @@ F: drivers/gpu/drm/gud/ F: include/drm/gud.h DRM DRIVER FOR GRAIN MEDIA GM12U320 PROJECTORS -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: drivers/gpu/drm/tiny/gm12u320.c @@ -7918,7 +7919,7 @@ F: drivers/gpu/drm/ci/xfails/vkms* F: drivers/gpu/drm/vkms/ DRM DRIVER FOR VIRTUALBOX VIRTUAL GPU -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: dri-devel@lists.freedesktop.org S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git @@ -8319,7 +8320,7 @@ F: drivers/gpu/drm/panel/ F: include/drm/drm_panel.h DRM PRIVACY-SCREEN CLASS -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: dri-devel@lists.freedesktop.org S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git @@ -9942,7 +9943,6 @@ F: drivers/fwctl/mlx5/ FWCTL PDS DRIVER M: Brett Creeley <brett.creeley@amd.com> -R: Shannon Nelson <shannon.nelson@amd.com> L: linux-kernel@vger.kernel.org S: Maintained F: drivers/fwctl/pds/ @@ -10223,7 +10223,7 @@ S: Maintained F: Documentation/devicetree/bindings/connector/gocontroll,moduline-module-slot.yaml GOODIX TOUCHSCREEN -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org S: Maintained F: drivers/input/touchscreen/goodix* @@ -10262,7 +10262,7 @@ F: include/dt-bindings/clock/google,gs101.h K: [gG]oogle.?[tT]ensor GPD POCKET FAN DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/gpd-pocket-fan.c @@ -10506,7 +10506,7 @@ S: Maintained F: block/partitions/efi.* HABANALABS PCI DRIVER -M: Ofir Bitton <obitton@habana.ai> +M: Yaron Avizrat <yaron.avizrat@intel.com> L: dri-devel@lists.freedesktop.org S: Supported C: irc://irc.oftc.net/dri-devel @@ -10841,7 +10841,7 @@ S: Maintained F: drivers/dma/hisi_dma.c HISILICON GPIO DRIVER -M: Jay Fang <f.fangjian@huawei.com> +M: Yang Shen <shenyang39@huawei.com> L: linux-gpio@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml @@ -11157,7 +11157,8 @@ F: include/linux/platform_data/huawei-gaokun-ec.h HUGETLB SUBSYSTEM M: Muchun Song <muchun.song@linux.dev> -R: Oscar Salvador <osalvador@suse.de> +M: Oscar Salvador <osalvador@suse.de> +R: David Hildenbrand <david@redhat.com> L: linux-mm@kvack.org S: Maintained F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages @@ -11168,6 +11169,7 @@ F: fs/hugetlbfs/ F: include/linux/hugetlb.h F: include/trace/events/hugetlbfs.h F: mm/hugetlb.c +F: mm/hugetlb_cgroup.c F: mm/hugetlb_cma.c F: mm/hugetlb_cma.h F: mm/hugetlb_vmemmap.c @@ -11423,7 +11425,7 @@ F: drivers/i2c/busses/i2c-via.c F: drivers/i2c/busses/i2c-viapro.c I2C/SMBUS INTEL CHT WHISKEY COVE PMIC DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/busses/i2c-cht-wc.c @@ -12013,13 +12015,13 @@ S: Supported F: sound/soc/intel/ INTEL ATOMISP2 DUMMY / POWER-MANAGEMENT DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/intel/atomisp2/pm.c INTEL ATOMISP2 LED DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/intel/atomisp2/led.c @@ -13347,6 +13349,7 @@ M: Alexander Graf <graf@amazon.com> M: Mike Rapoport <rppt@kernel.org> M: Changyuan Lyu <changyuanl@google.com> L: kexec@lists.infradead.org +L: linux-mm@kvack.org S: Maintained F: Documentation/admin-guide/mm/kho.rst F: Documentation/core-api/kho/* @@ -13680,7 +13683,7 @@ S: Maintained F: drivers/platform/x86/lenovo-wmi-hotkey-utilities.c LETSKETCH HID TABLET DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git @@ -13730,7 +13733,7 @@ F: drivers/ata/sata_gemini.c F: drivers/ata/sata_gemini.h LIBATA SATA AHCI PLATFORM devices support -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-ide@vger.kernel.org S: Maintained F: drivers/ata/ahci_platform.c @@ -13800,7 +13803,7 @@ M: Oliver O'Halloran <oohall@gmail.com> L: nvdimm@lists.linux.dev S: Supported Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ -F: Documentation/devicetree/bindings/pmem/pmem-region.txt +F: Documentation/devicetree/bindings/pmem/pmem-region.yaml F: drivers/nvdimm/of_pmem.c LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM @@ -14100,7 +14103,7 @@ F: Documentation/admin-guide/ldm.rst F: block/partitions/ldm.* LOGITECH HID GAMING KEYBOARDS -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git @@ -14782,7 +14785,7 @@ F: Documentation/devicetree/bindings/power/supply/maxim,max17040.yaml F: drivers/power/supply/max17040_battery.c MAXIM MAX17042 FAMILY FUEL GAUGE DRIVERS -R: Hans de Goede <hdegoede@redhat.com> +R: Hans de Goede <hansg@kernel.org> R: Krzysztof Kozlowski <krzk@kernel.org> R: Marek Szyprowski <m.szyprowski@samsung.com> R: Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm> @@ -15549,6 +15552,7 @@ F: drivers/net/ethernet/mellanox/mlx4/en_* MELLANOX ETHERNET DRIVER (mlx5e) M: Saeed Mahameed <saeedm@nvidia.com> M: Tariq Toukan <tariqt@nvidia.com> +M: Mark Bloch <mbloch@nvidia.com> L: netdev@vger.kernel.org S: Maintained W: https://www.nvidia.com/networking/ @@ -15584,7 +15588,7 @@ Q: https://patchwork.kernel.org/project/netdevbpf/list/ F: drivers/net/ethernet/mellanox/mlxfw/ MELLANOX HARDWARE PLATFORM SUPPORT -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> M: Vadim Pasternak <vadimp@nvidia.com> L: platform-driver-x86@vger.kernel.org @@ -15618,6 +15622,7 @@ MELLANOX MLX5 core VPI driver M: Saeed Mahameed <saeedm@nvidia.com> M: Leon Romanovsky <leonro@nvidia.com> M: Tariq Toukan <tariqt@nvidia.com> +M: Mark Bloch <mbloch@nvidia.com> L: netdev@vger.kernel.org L: linux-rdma@vger.kernel.org S: Maintained @@ -15675,11 +15680,16 @@ MEMBLOCK AND MEMORY MANAGEMENT INITIALIZATION M: Mike Rapoport <rppt@kernel.org> L: linux-mm@kvack.org S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git for-next +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git fixes F: Documentation/core-api/boot-time-mm.rst F: Documentation/core-api/kho/bindings/memblock/* F: include/linux/memblock.h +F: mm/bootmem_info.c F: mm/memblock.c +F: mm/memtest.c F: mm/mm_init.c +F: mm/rodata_test.c F: tools/testing/memblock/ MEMORY ALLOCATION PROFILING @@ -15734,7 +15744,6 @@ F: Documentation/admin-guide/mm/ F: Documentation/mm/ F: include/linux/gfp.h F: include/linux/gfp_types.h -F: include/linux/memfd.h F: include/linux/memory_hotplug.h F: include/linux/memory-tiers.h F: include/linux/mempolicy.h @@ -15794,6 +15803,10 @@ S: Maintained W: http://www.linux-mm.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm F: mm/gup.c +F: mm/gup_test.c +F: mm/gup_test.h +F: tools/testing/selftests/mm/gup_longterm.c +F: tools/testing/selftests/mm/gup_test.c MEMORY MANAGEMENT - KSM (Kernel Samepage Merging) M: Andrew Morton <akpm@linux-foundation.org> @@ -15841,6 +15854,17 @@ F: mm/numa.c F: mm/numa_emulation.c F: mm/numa_memblks.c +MEMORY MANAGEMENT - OOM KILLER +M: Michal Hocko <mhocko@suse.com> +R: David Rientjes <rientjes@google.com> +R: Shakeel Butt <shakeel.butt@linux.dev> +L: linux-mm@kvack.org +S: Maintained +F: include/linux/oom.h +F: include/trace/events/oom.h +F: include/uapi/linux/oom.h +F: mm/oom_kill.c + MEMORY MANAGEMENT - PAGE ALLOCATOR M: Andrew Morton <akpm@linux-foundation.org> M: Vlastimil Babka <vbabka@suse.cz> @@ -15855,8 +15879,17 @@ F: include/linux/compaction.h F: include/linux/gfp.h F: include/linux/page-isolation.h F: mm/compaction.c +F: mm/debug_page_alloc.c +F: mm/fail_page_alloc.c F: mm/page_alloc.c +F: mm/page_ext.c +F: mm/page_frag_cache.c F: mm/page_isolation.c +F: mm/page_owner.c +F: mm/page_poison.c +F: mm/page_reporting.c +F: mm/show_mem.c +F: mm/shuffle.c MEMORY MANAGEMENT - RECLAIM M: Andrew Morton <akpm@linux-foundation.org> @@ -15870,6 +15903,7 @@ L: linux-mm@kvack.org S: Maintained F: mm/pt_reclaim.c F: mm/vmscan.c +F: mm/workingset.c MEMORY MANAGEMENT - RMAP (REVERSE MAPPING) M: Andrew Morton <akpm@linux-foundation.org> @@ -15882,6 +15916,7 @@ R: Harry Yoo <harry.yoo@oracle.com> L: linux-mm@kvack.org S: Maintained F: include/linux/rmap.h +F: mm/page_vma_mapped.c F: mm/rmap.c MEMORY MANAGEMENT - SECRETMEM @@ -15914,9 +15949,9 @@ F: mm/swapfile.c MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE) M: Andrew Morton <akpm@linux-foundation.org> M: David Hildenbrand <david@redhat.com> +M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> R: Zi Yan <ziy@nvidia.com> R: Baolin Wang <baolin.wang@linux.alibaba.com> -R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> R: Liam R. Howlett <Liam.Howlett@oracle.com> R: Nico Pache <npache@redhat.com> R: Ryan Roberts <ryan.roberts@arm.com> @@ -15974,11 +16009,14 @@ S: Maintained W: http://www.linux-mm.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm F: include/trace/events/mmap.h +F: mm/mincore.c F: mm/mlock.c F: mm/mmap.c F: mm/mprotect.c F: mm/mremap.c F: mm/mseal.c +F: mm/msync.c +F: mm/nommu.c F: mm/vma.c F: mm/vma.h F: mm/vma_exec.c @@ -16541,7 +16579,7 @@ S: Maintained F: drivers/platform/surface/surface_gpe.c MICROSOFT SURFACE HARDWARE PLATFORM SUPPORT -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> M: Maximilian Luz <luzmaximilian@gmail.com> L: platform-driver-x86@vger.kernel.org @@ -16786,8 +16824,8 @@ F: include/dt-bindings/clock/mobileye,eyeq5-clk.h MODULE SUPPORT M: Luis Chamberlain <mcgrof@kernel.org> M: Petr Pavlu <petr.pavlu@suse.com> +M: Daniel Gomez <da.gomez@kernel.org> R: Sami Tolvanen <samitolvanen@google.com> -R: Daniel Gomez <da.gomez@samsung.com> L: linux-modules@vger.kernel.org L: linux-kernel@vger.kernel.org S: Maintained @@ -17186,10 +17224,10 @@ F: drivers/rtc/rtc-ntxec.c F: include/linux/mfd/ntxec.h NETRONOME ETHERNET DRIVERS -M: Louis Peens <louis.peens@corigine.com> R: Jakub Kicinski <kuba@kernel.org> +R: Simon Horman <horms@kernel.org> L: oss-drivers@corigine.com -S: Maintained +S: Odd Fixes F: drivers/net/ethernet/netronome/ NETWORK BLOCK DEVICE (NBD) @@ -17709,7 +17747,7 @@ F: tools/include/nolibc/ F: tools/testing/selftests/nolibc/ NOVATEK NVT-TS I2C TOUCHSCREEN DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/input/touchscreen/novatek,nvt-ts.yaml @@ -19379,7 +19417,7 @@ F: crypto/pcrypt.c F: include/crypto/pcrypt.h PDS DSC VIRTIO DATA PATH ACCELERATOR -R: Shannon Nelson <shannon.nelson@amd.com> +R: Brett Creeley <brett.creeley@amd.com> F: drivers/vdpa/pds/ PECI HARDWARE MONITORING DRIVERS @@ -19401,7 +19439,6 @@ F: include/linux/peci-cpu.h F: include/linux/peci.h PENSANDO ETHERNET DRIVERS -M: Shannon Nelson <shannon.nelson@amd.com> M: Brett Creeley <brett.creeley@amd.com> L: netdev@vger.kernel.org S: Maintained @@ -19566,8 +19603,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git F: drivers/pinctrl/intel/ PIN CONTROLLER - KEEMBAY -M: Lakshmi Sowjanya D <lakshmi.sowjanya.d@intel.com> -S: Supported +S: Orphan F: drivers/pinctrl/pinctrl-keembay* PIN CONTROLLER - MEDIATEK @@ -20120,21 +20156,15 @@ S: Supported F: Documentation/devicetree/bindings/soc/qcom/qcom,apr* F: Documentation/devicetree/bindings/sound/qcom,* F: drivers/soc/qcom/apr.c -F: include/dt-bindings/sound/qcom,wcd9335.h -F: include/dt-bindings/sound/qcom,wcd934x.h -F: sound/soc/codecs/lpass-rx-macro.* -F: sound/soc/codecs/lpass-tx-macro.* -F: sound/soc/codecs/lpass-va-macro.c -F: sound/soc/codecs/lpass-wsa-macro.* +F: drivers/soundwire/qcom.c +F: include/dt-bindings/sound/qcom,wcd93* +F: sound/soc/codecs/lpass-*.* F: sound/soc/codecs/msm8916-wcd-analog.c F: sound/soc/codecs/msm8916-wcd-digital.c F: sound/soc/codecs/wcd-clsh-v2.* F: sound/soc/codecs/wcd-mbhc-v2.* -F: sound/soc/codecs/wcd9335.* -F: sound/soc/codecs/wcd934x.c -F: sound/soc/codecs/wsa881x.c -F: sound/soc/codecs/wsa883x.c -F: sound/soc/codecs/wsa884x.c +F: sound/soc/codecs/wcd93*.* +F: sound/soc/codecs/wsa88*.* F: sound/soc/qcom/ QCOM EMBEDDED USB DEBUGGER (EUD) @@ -21165,7 +21195,7 @@ M: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> L: netdev@vger.kernel.org L: linux-renesas-soc@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/net/renesas,r9a09g057-gbeth.yaml +F: Documentation/devicetree/bindings/net/renesas,rzv2h-gbeth.yaml F: drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c RENESAS RZ/V2H(P) USB2PHY PORT RESET DRIVER @@ -21377,7 +21407,7 @@ N: spacemit K: spacemit RISC-V THEAD SoC SUPPORT -M: Drew Fustini <drew@pdp7.com> +M: Drew Fustini <fustini@kernel.org> M: Guo Ren <guoren@kernel.org> M: Fu Wei <wefu@redhat.com> L: linux-riscv@lists.infradead.org @@ -22174,7 +22204,7 @@ R: Tejun Heo <tj@kernel.org> R: David Vernet <void@manifault.com> R: Andrea Righi <arighi@nvidia.com> R: Changwoo Min <changwoo@igalia.com> -L: linux-kernel@vger.kernel.org +L: sched-ext@lists.linux.dev S: Maintained W: https://github.com/sched-ext/scx T: git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git @@ -22553,9 +22583,11 @@ S: Maintained F: drivers/misc/sgi-xp/ SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS +M: D. Wythe <alibuda@linux.alibaba.com> +M: Dust Li <dust.li@linux.alibaba.com> +M: Sidraya Jayagond <sidraya@linux.ibm.com> M: Wenjia Zhang <wenjia@linux.ibm.com> -M: Jan Karcher <jaka@linux.ibm.com> -R: D. Wythe <alibuda@linux.alibaba.com> +R: Mahanta Jambigi <mjambigi@linux.ibm.com> R: Tony Lu <tonylu@linux.alibaba.com> R: Wen Gu <guwen@linux.alibaba.com> L: linux-rdma@vger.kernel.org @@ -22711,7 +22743,7 @@ K: fu[57]40 K: [^@]sifive SILEAD TOUCHSCREEN DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org L: platform-driver-x86@vger.kernel.org S: Maintained @@ -22744,7 +22776,7 @@ F: Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml F: drivers/i3c/master/svc-i3c-master.c SIMPLEFB FB DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-fbdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/display/simple-framebuffer.yaml @@ -22873,7 +22905,7 @@ F: Documentation/hwmon/emc2103.rst F: drivers/hwmon/emc2103.c SMSC SCH5627 HARDWARE MONITOR DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-hwmon@vger.kernel.org S: Supported F: Documentation/hwmon/sch5627.rst @@ -23528,7 +23560,7 @@ S: Supported F: Documentation/process/stable-kernel-rules.rst STAGING - ATOMISP DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> M: Mauro Carvalho Chehab <mchehab@kernel.org> R: Sakari Ailus <sakari.ailus@linux.intel.com> L: linux-media@vger.kernel.org @@ -23824,7 +23856,7 @@ F: arch/m68k/sun3*/ F: drivers/net/ethernet/i825xx/sun3* SUN4I LOW RES ADC ATTACHED TABLET KEYS DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml @@ -24066,6 +24098,7 @@ M: Bin Du <bin.du@amd.com> L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/busses/i2c-designware-amdisp.c +F: include/linux/soc/amd/isp4_misc.h SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER M: Jaehoon Chung <jh80.chung@samsung.com> @@ -25030,8 +25063,11 @@ M: Hugh Dickins <hughd@google.com> R: Baolin Wang <baolin.wang@linux.alibaba.com> L: linux-mm@kvack.org S: Maintained +F: include/linux/memfd.h F: include/linux/shmem_fs.h +F: mm/memfd.c F: mm/shmem.c +F: mm/shmem_quota.c TOMOYO SECURITY MODULE M: Kentaro Takeda <takedakn@nttdata.co.jp> @@ -25592,7 +25628,7 @@ F: Documentation/hid/hiddev.rst F: drivers/hid/usbhid/ USB INTEL XHCI ROLE MUX DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-usb@vger.kernel.org S: Maintained F: drivers/usb/roles/intel-xhci-usb-role-switch.c @@ -25783,7 +25819,7 @@ F: Documentation/firmware-guide/acpi/intel-pmc-mux.rst F: drivers/usb/typec/mux/intel_pmc_mux.c USB TYPEC PI3USB30532 MUX DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-usb@vger.kernel.org S: Maintained F: drivers/usb/typec/mux/pi3usb30532.c @@ -25812,7 +25848,7 @@ F: drivers/usb/host/uhci* USB VIDEO CLASS M: Laurent Pinchart <laurent.pinchart@ideasonboard.com> -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-media@vger.kernel.org S: Maintained W: http://www.ideasonboard.org/uvc/ @@ -26343,7 +26379,7 @@ F: include/uapi/linux/virtio_snd.h F: sound/virtio/* VIRTUAL BOX GUEST DEVICE DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> M: Arnd Bergmann <arnd@arndb.de> M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> S: Maintained @@ -26352,7 +26388,7 @@ F: include/linux/vbox_utils.h F: include/uapi/linux/vbox*.h VIRTUAL BOX SHARED FOLDER VFS DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-fsdevel@vger.kernel.org S: Maintained F: fs/vboxsf/* @@ -26606,7 +26642,7 @@ F: drivers/mmc/host/wbsd.* WACOM PROTOCOL 4 SERIAL TABLETS M: Julian Squires <julian@cipht.net> -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: linux-input@vger.kernel.org S: Maintained F: drivers/input/tablet/wacom_serial4.c @@ -26773,7 +26809,7 @@ F: include/linux/wwan.h F: include/uapi/linux/wwan.h X-POWERS AXP288 PMIC DRIVERS -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> S: Maintained F: drivers/acpi/pmic/intel_pmic_xpower.c N: axp288 @@ -26865,14 +26901,14 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/mm F: arch/x86/mm/ X86 PLATFORM ANDROID TABLETS DSDT FIXUP DRIVER -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> L: platform-driver-x86@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git F: drivers/platform/x86/x86-android-tablets/ X86 PLATFORM DRIVERS -M: Hans de Goede <hdegoede@redhat.com> +M: Hans de Goede <hansg@kernel.org> M: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> L: platform-driver-x86@vger.kernel.org S: Maintained @@ -26903,7 +26939,7 @@ F: arch/x86/kernel/stacktrace.c F: arch/x86/kernel/unwind_*.c X86 TRUST DOMAIN EXTENSIONS (TDX) -M: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> +M: Kirill A. Shutemov <kas@kernel.org> R: Dave Hansen <dave.hansen@linux.intel.com> L: x86@kernel.org L: linux-coco@lists.linux.dev @@ -27272,13 +27308,6 @@ S: Supported W: http://www.marvell.com F: drivers/i2c/busses/i2c-xlp9xx.c -XRA1403 GPIO EXPANDER -M: Nandor Han <nandor.han@ge.com> -L: linux-gpio@vger.kernel.org -S: Maintained -F: Documentation/devicetree/bindings/gpio/gpio-xra1403.txt -F: drivers/gpio/gpio-xra1403.c - XTENSA XTFPGA PLATFORM SUPPORT M: Max Filippov <jcmvbkbc@gmail.com> S: Maintained @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc6 NAME = Baby Opossum Posse # *DOCUMENTATION* diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 55fc331af337..393d71124f5d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -256,6 +256,7 @@ config ARM64 select HOTPLUG_SMT if HOTPLUG_CPU select IRQ_DOMAIN select IRQ_FORCED_THREADING + select JUMP_LABEL select KASAN_VMALLOC if KASAN select LOCK_MM_AND_FIND_VMA select MODULES_USE_ELF_RELA diff --git a/arch/arm64/boot/dts/apple/spi1-nvram.dtsi b/arch/arm64/boot/dts/apple/spi1-nvram.dtsi index 3df2fd3993b5..9740fbf200f0 100644 --- a/arch/arm64/boot/dts/apple/spi1-nvram.dtsi +++ b/arch/arm64/boot/dts/apple/spi1-nvram.dtsi @@ -20,8 +20,6 @@ compatible = "jedec,spi-nor"; reg = <0x0>; spi-max-frequency = <25000000>; - #address-cells = <1>; - #size-cells = <1>; partitions { compatible = "fixed-partitions"; diff --git a/arch/arm64/boot/dts/apple/t8103-j293.dts b/arch/arm64/boot/dts/apple/t8103-j293.dts index e2d9439397f7..5b3c42e9f0e6 100644 --- a/arch/arm64/boot/dts/apple/t8103-j293.dts +++ b/arch/arm64/boot/dts/apple/t8103-j293.dts @@ -100,6 +100,8 @@ &displaydfr_mipi { status = "okay"; + #address-cells = <1>; + #size-cells = <0>; dfr_panel: panel@0 { compatible = "apple,j293-summit", "apple,summit"; diff --git a/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi b/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi index 8e82231acab5..0c8206156bfe 100644 --- a/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi +++ b/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi @@ -71,7 +71,7 @@ */ &port00 { bus-range = <1 1>; - wifi0: network@0,0 { + wifi0: wifi@0,0 { compatible = "pci14e4,4425"; reg = <0x10000 0x0 0x0 0x0 0x0>; /* To be filled by the loader */ diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi index 20faf0c0d809..3a204845b85b 100644 --- a/arch/arm64/boot/dts/apple/t8103.dtsi +++ b/arch/arm64/boot/dts/apple/t8103.dtsi @@ -405,8 +405,6 @@ compatible = "apple,t8103-display-pipe-mipi", "apple,h7-display-pipe-mipi"; reg = <0x2 0x28600000 0x0 0x100000>; power-domains = <&ps_mipi_dsi>; - #address-cells = <1>; - #size-cells = <0>; status = "disabled"; ports { diff --git a/arch/arm64/boot/dts/apple/t8112-j493.dts b/arch/arm64/boot/dts/apple/t8112-j493.dts index be86d34c6696..fb8ad7d4c65a 100644 --- a/arch/arm64/boot/dts/apple/t8112-j493.dts +++ b/arch/arm64/boot/dts/apple/t8112-j493.dts @@ -63,6 +63,8 @@ &displaydfr_mipi { status = "okay"; + #address-cells = <1>; + #size-cells = <0>; dfr_panel: panel@0 { compatible = "apple,j493-summit", "apple,summit"; diff --git a/arch/arm64/boot/dts/apple/t8112.dtsi b/arch/arm64/boot/dts/apple/t8112.dtsi index e95711d8337f..f68354194355 100644 --- a/arch/arm64/boot/dts/apple/t8112.dtsi +++ b/arch/arm64/boot/dts/apple/t8112.dtsi @@ -420,8 +420,6 @@ compatible = "apple,t8112-display-pipe-mipi", "apple,h7-display-pipe-mipi"; reg = <0x2 0x28600000 0x0 0x100000>; power-domains = <&ps_mipi_dsi>; - #address-cells = <1>; - #size-cells = <0>; status = "disabled"; ports { diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 897fc686e6a9..7e04a2905ce4 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1573,6 +1573,7 @@ CONFIG_RESET_QCOM_AOSS=y CONFIG_RESET_QCOM_PDC=m CONFIG_RESET_RZG2L_USBPHY_CTRL=y CONFIG_RESET_TI_SCI=y +CONFIG_PHY_SNPS_EUSB2=m CONFIG_PHY_XGENE=y CONFIG_PHY_CAN_TRANSCEIVER=m CONFIG_PHY_NXP_PTN3222=m @@ -1597,7 +1598,6 @@ CONFIG_PHY_QCOM_EDP=m CONFIG_PHY_QCOM_PCIE2=m CONFIG_PHY_QCOM_QMP=m CONFIG_PHY_QCOM_QUSB2=m -CONFIG_PHY_QCOM_SNPS_EUSB2=m CONFIG_PHY_QCOM_EUSB2_REPEATER=m CONFIG_PHY_QCOM_M31_USB=m CONFIG_PHY_QCOM_USB_HS=m diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index ba5df0df02a4..9f38340d24c2 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -287,17 +287,6 @@ .Lskip_fgt2_\@: .endm -.macro __init_el2_gcs - mrs_s x1, SYS_ID_AA64PFR1_EL1 - ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4 - cbz x1, .Lskip_gcs_\@ - - /* Ensure GCS is not enabled when we start trying to do BLs */ - msr_s SYS_GCSCR_EL1, xzr - msr_s SYS_GCSCRE0_EL1, xzr -.Lskip_gcs_\@: -.endm - /** * Initialize EL2 registers to sane values. This should be called early on all * cores that were booted in EL2. Note that everything gets initialised as @@ -319,7 +308,6 @@ __init_el2_cptr __init_el2_fgt __init_el2_fgt2 - __init_el2_gcs .endm #ifndef __KVM_NVHE_HYPERVISOR__ @@ -371,6 +359,13 @@ msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2 .Lskip_mpam_\@: + check_override id_aa64pfr1, ID_AA64PFR1_EL1_GCS_SHIFT, .Linit_gcs_\@, .Lskip_gcs_\@, x1, x2 + +.Linit_gcs_\@: + msr_s SYS_GCSCR_EL1, xzr + msr_s SYS_GCSCRE0_EL1, xzr + +.Lskip_gcs_\@: check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2 .Linit_sve_\@: /* SVE register access */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index bd020fc28aa9..0720898f563e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -561,68 +561,6 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) vcpu_set_flag((v), e); \ } while (0) -#define __build_check_all_or_none(r, bits) \ - BUILD_BUG_ON(((r) & (bits)) && ((r) & (bits)) != (bits)) - -#define __cpacr_to_cptr_clr(clr, set) \ - ({ \ - u64 cptr = 0; \ - \ - if ((set) & CPACR_EL1_FPEN) \ - cptr |= CPTR_EL2_TFP; \ - if ((set) & CPACR_EL1_ZEN) \ - cptr |= CPTR_EL2_TZ; \ - if ((set) & CPACR_EL1_SMEN) \ - cptr |= CPTR_EL2_TSM; \ - if ((clr) & CPACR_EL1_TTA) \ - cptr |= CPTR_EL2_TTA; \ - if ((clr) & CPTR_EL2_TAM) \ - cptr |= CPTR_EL2_TAM; \ - if ((clr) & CPTR_EL2_TCPAC) \ - cptr |= CPTR_EL2_TCPAC; \ - \ - cptr; \ - }) - -#define __cpacr_to_cptr_set(clr, set) \ - ({ \ - u64 cptr = 0; \ - \ - if ((clr) & CPACR_EL1_FPEN) \ - cptr |= CPTR_EL2_TFP; \ - if ((clr) & CPACR_EL1_ZEN) \ - cptr |= CPTR_EL2_TZ; \ - if ((clr) & CPACR_EL1_SMEN) \ - cptr |= CPTR_EL2_TSM; \ - if ((set) & CPACR_EL1_TTA) \ - cptr |= CPTR_EL2_TTA; \ - if ((set) & CPTR_EL2_TAM) \ - cptr |= CPTR_EL2_TAM; \ - if ((set) & CPTR_EL2_TCPAC) \ - cptr |= CPTR_EL2_TCPAC; \ - \ - cptr; \ - }) - -#define cpacr_clear_set(clr, set) \ - do { \ - BUILD_BUG_ON((set) & CPTR_VHE_EL2_RES0); \ - BUILD_BUG_ON((clr) & CPACR_EL1_E0POE); \ - __build_check_all_or_none((clr), CPACR_EL1_FPEN); \ - __build_check_all_or_none((set), CPACR_EL1_FPEN); \ - __build_check_all_or_none((clr), CPACR_EL1_ZEN); \ - __build_check_all_or_none((set), CPACR_EL1_ZEN); \ - __build_check_all_or_none((clr), CPACR_EL1_SMEN); \ - __build_check_all_or_none((set), CPACR_EL1_SMEN); \ - \ - if (has_vhe() || has_hvhe()) \ - sysreg_clear_set(cpacr_el1, clr, set); \ - else \ - sysreg_clear_set(cptr_el2, \ - __cpacr_to_cptr_clr(clr, set), \ - __cpacr_to_cptr_set(clr, set));\ - } while (0) - /* * Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE * format if E2H isn't set. diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5ccca509dff1..3e41a880b062 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1289,9 +1289,8 @@ void kvm_arm_resume_guest(struct kvm *kvm); }) /* - * The couple of isb() below are there to guarantee the same behaviour - * on VHE as on !VHE, where the eret to EL1 acts as a context - * synchronization event. + * The isb() below is there to guarantee the same behaviour on VHE as on !VHE, + * where the eret to EL1 acts as a context synchronization event. */ #define kvm_call_hyp(f, ...) \ do { \ @@ -1309,7 +1308,6 @@ void kvm_arm_resume_guest(struct kvm *kvm); \ if (has_vhe()) { \ ret = f(__VA_ARGS__); \ - isb(); \ } else { \ ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \ } \ @@ -1482,7 +1480,6 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *range); /* Guest/host FPSIMD coordination helpers */ -int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 2920b0a51403..a2faf0049dab 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -34,7 +34,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ syscall.o proton-pack.o idle.o patching.o pi/ \ - rsi.o + rsi.o jump_label.o obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ sys_compat.o @@ -47,7 +47,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o -obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_EFI) += efi.o efi-rt-wrapper.o obj-$(CONFIG_PCI) += pci.o diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b34044e20128..e151585c6cca 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -3135,6 +3135,13 @@ static bool has_sve_feature(const struct arm64_cpu_capabilities *cap, int scope) } #endif +#ifdef CONFIG_ARM64_SME +static bool has_sme_feature(const struct arm64_cpu_capabilities *cap, int scope) +{ + return system_supports_sme() && has_user_cpuid_feature(cap, scope); +} +#endif + static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR0_EL1, AES, PMULL, CAP_HWCAP, KERNEL_HWCAP_PMULL), HWCAP_CAP(ID_AA64ISAR0_EL1, AES, AES, CAP_HWCAP, KERNEL_HWCAP_AES), @@ -3223,31 +3230,31 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR2_EL1, BC, IMP, CAP_HWCAP, KERNEL_HWCAP_HBC), #ifdef CONFIG_ARM64_SME HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME), - HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), - HWCAP_CAP(ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), - HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), - HWCAP_CAP(ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), - HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16), - HWCAP_CAP(ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), - HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM), - HWCAP_CAP(ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES), - HWCAP_CAP(ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA), - HWCAP_CAP(ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP), - HWCAP_CAP(ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p2, CAP_HWCAP, KERNEL_HWCAP_SME2P2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, STMOP, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_STMOP), + HWCAP_CAP_MATCH_ID(has_sme_feature, ID_AA64SMFR0_EL1, SMOP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SMOP4), #endif /* CONFIG_ARM64_SME */ HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT), HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA), diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 3857fd7ee8d4..62230d6dd919 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -15,6 +15,7 @@ #include <asm/efi.h> #include <asm/stacktrace.h> +#include <asm/vmap_stack.h> static bool region_is_misaligned(const efi_memory_desc_t *md) { @@ -214,9 +215,13 @@ static int __init arm64_efi_rt_init(void) if (!efi_enabled(EFI_RUNTIME_SERVICES)) return 0; - p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL, - NUMA_NO_NODE, &&l); -l: if (!p) { + if (!IS_ENABLED(CONFIG_VMAP_STACK)) { + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return -ENOMEM; + } + + p = arch_alloc_vmap_stack(THREAD_SIZE, NUMA_NO_NODE); + if (!p) { pr_warn("Failed to allocate EFI runtime stack\n"); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return -ENOMEM; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index a5ca15daeb8a..08b7042a2e2d 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -288,7 +288,9 @@ static void flush_gcs(void) if (!system_supports_gcs()) return; - gcs_free(current); + current->thread.gcspr_el0 = 0; + current->thread.gcs_base = 0; + current->thread.gcs_size = 0; current->thread.gcs_el0_mode = 0; write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1); write_sysreg_s(0, SYS_GCSPR_EL0); @@ -671,6 +673,11 @@ static void permission_overlay_switch(struct task_struct *next) current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0); if (current->thread.por_el0 != next->thread.por_el0) { write_sysreg_s(next->thread.por_el0, SYS_POR_EL0); + /* + * No ISB required as we can tolerate spurious Overlay faults - + * the fault handler will check again based on the new value + * of POR_EL0. + */ } } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index a360e52db02f..ee94b72bf8fb 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -141,7 +141,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) addr += n; if (regs_within_kernel_stack(regs, (unsigned long)addr)) - return *addr; + return READ_ONCE_NOCHECK(*addr); else return 0; } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 3b3f6b56e733..21a795303568 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -1143,7 +1143,7 @@ static inline unsigned int num_other_online_cpus(void) void smp_send_stop(void) { static unsigned long stop_in_progress; - cpumask_t mask; + static cpumask_t mask; unsigned long timeout; /* diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index de2b4e9c9f9f..23dd3f3fc3eb 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -825,10 +825,6 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (!kvm_arm_vcpu_is_finalized(vcpu)) return -EPERM; - ret = kvm_arch_vcpu_run_map_fp(vcpu); - if (ret) - return ret; - if (likely(vcpu_has_run_once(vcpu))) return 0; @@ -2129,7 +2125,7 @@ static void cpu_hyp_init(void *discard) static void cpu_hyp_uninit(void *discard) { - if (__this_cpu_read(kvm_hyp_initialized)) { + if (!is_protected_kvm_enabled() && __this_cpu_read(kvm_hyp_initialized)) { cpu_hyp_reset(); __this_cpu_write(kvm_hyp_initialized, 0); } @@ -2345,8 +2341,13 @@ static void __init teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) { + if (per_cpu(kvm_hyp_initialized, cpu)) + continue; + free_pages(per_cpu(kvm_arm_hyp_stack_base, cpu), NVHE_STACK_SHIFT - PAGE_SHIFT); - free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); + + if (!kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]) + continue; if (free_sve) { struct cpu_sve_state *sve_state; @@ -2354,6 +2355,9 @@ static void __init teardown_hyp_mode(void) sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state; free_pages((unsigned long) sve_state, pkvm_host_sve_state_order()); } + + free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); + } } @@ -2764,7 +2768,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old, struct kvm_kernel_irq_routing_entry *new) { - if (new->type != KVM_IRQ_ROUTING_MSI) + if (old->type != KVM_IRQ_ROUTING_MSI || + new->type != KVM_IRQ_ROUTING_MSI) return true; return memcmp(&old->msi, &new->msi, sizeof(new->msi)); diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 8f6c8f57c6b9..15e17aca1dec 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -15,32 +15,6 @@ #include <asm/sysreg.h> /* - * Called on entry to KVM_RUN unless this vcpu previously ran at least - * once and the most recent prior KVM_RUN for this vcpu was called from - * the same task as current (highly likely). - * - * This is guaranteed to execute before kvm_arch_vcpu_load_fp(vcpu), - * such that on entering hyp the relevant parts of current are already - * mapped. - */ -int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) -{ - struct user_fpsimd_state *fpsimd = ¤t->thread.uw.fpsimd_state; - int ret; - - /* pKVM has its own tracking of the host fpsimd state. */ - if (is_protected_kvm_enabled()) - return 0; - - /* Make sure the host task fpsimd state is visible to hyp: */ - ret = kvm_share_hyp(fpsimd, fpsimd + 1); - if (ret) - return ret; - - return 0; -} - -/* * Prepare vcpu for saving the host's FPSIMD state and loading the guest's. * The actual loading is done by the FPSIMD access trap taken to hyp. * diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 76dfda116e56..2ad57b117385 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -65,6 +65,136 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) } } +static inline void __activate_cptr_traps_nvhe(struct kvm_vcpu *vcpu) +{ + u64 val = CPTR_NVHE_EL2_RES1 | CPTR_EL2_TAM | CPTR_EL2_TTA; + + /* + * Always trap SME since it's not supported in KVM. + * TSM is RES1 if SME isn't implemented. + */ + val |= CPTR_EL2_TSM; + + if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs()) + val |= CPTR_EL2_TZ; + + if (!guest_owns_fp_regs()) + val |= CPTR_EL2_TFP; + + write_sysreg(val, cptr_el2); +} + +static inline void __activate_cptr_traps_vhe(struct kvm_vcpu *vcpu) +{ + /* + * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to + * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, + * except for some missing controls, such as TAM. + * In this case, CPTR_EL2.TAM has the same position with or without + * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM + * shift value for trapping the AMU accesses. + */ + u64 val = CPTR_EL2_TAM | CPACR_EL1_TTA; + u64 cptr; + + if (guest_owns_fp_regs()) { + val |= CPACR_EL1_FPEN; + if (vcpu_has_sve(vcpu)) + val |= CPACR_EL1_ZEN; + } + + if (!vcpu_has_nv(vcpu)) + goto write; + + /* + * The architecture is a bit crap (what a surprise): an EL2 guest + * writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA, + * as they are RES0 in the guest's view. To work around it, trap the + * sucker using the very same bit it can't set... + */ + if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu)) + val |= CPTR_EL2_TCPAC; + + /* + * Layer the guest hypervisor's trap configuration on top of our own if + * we're in a nested context. + */ + if (is_hyp_ctxt(vcpu)) + goto write; + + cptr = vcpu_sanitised_cptr_el2(vcpu); + + /* + * Pay attention, there's some interesting detail here. + * + * The CPTR_EL2.xEN fields are 2 bits wide, although there are only two + * meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest): + * + * - CPTR_EL2.xEN = x0, traps are enabled + * - CPTR_EL2.xEN = x1, traps are disabled + * + * In other words, bit[0] determines if guest accesses trap or not. In + * the interest of simplicity, clear the entire field if the guest + * hypervisor has traps enabled to dispel any illusion of something more + * complicated taking place. + */ + if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0))) + val &= ~CPACR_EL1_FPEN; + if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0))) + val &= ~CPACR_EL1_ZEN; + + if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP)) + val |= cptr & CPACR_EL1_E0POE; + + val |= cptr & CPTR_EL2_TCPAC; + +write: + write_sysreg(val, cpacr_el1); +} + +static inline void __activate_cptr_traps(struct kvm_vcpu *vcpu) +{ + if (!guest_owns_fp_regs()) + __activate_traps_fpsimd32(vcpu); + + if (has_vhe() || has_hvhe()) + __activate_cptr_traps_vhe(vcpu); + else + __activate_cptr_traps_nvhe(vcpu); +} + +static inline void __deactivate_cptr_traps_nvhe(struct kvm_vcpu *vcpu) +{ + u64 val = CPTR_NVHE_EL2_RES1; + + if (!cpus_have_final_cap(ARM64_SVE)) + val |= CPTR_EL2_TZ; + if (!cpus_have_final_cap(ARM64_SME)) + val |= CPTR_EL2_TSM; + + write_sysreg(val, cptr_el2); +} + +static inline void __deactivate_cptr_traps_vhe(struct kvm_vcpu *vcpu) +{ + u64 val = CPACR_EL1_FPEN; + + if (cpus_have_final_cap(ARM64_SVE)) + val |= CPACR_EL1_ZEN; + if (cpus_have_final_cap(ARM64_SME)) + val |= CPACR_EL1_SMEN; + + write_sysreg(val, cpacr_el1); +} + +static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) +{ + if (has_vhe() || has_hvhe()) + __deactivate_cptr_traps_vhe(vcpu); + else + __deactivate_cptr_traps_nvhe(vcpu); +} + #define reg_to_fgt_masks(reg) \ ({ \ struct fgt_masks *m; \ @@ -486,11 +616,6 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) */ if (system_supports_sve()) { __hyp_sve_save_host(); - - /* Re-enable SVE traps if not supported for the guest vcpu. */ - if (!vcpu_has_sve(vcpu)) - cpacr_clear_set(CPACR_EL1_ZEN, 0); - } else { __fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs)); } @@ -541,10 +666,7 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) /* Valid trap. Switch the context: */ /* First disable enough traps to allow us to update the registers */ - if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve())) - cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN); - else - cpacr_clear_set(0, CPACR_EL1_FPEN); + __deactivate_cptr_traps(vcpu); isb(); /* Write out the host state if it's in the registers */ @@ -566,6 +688,13 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) *host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED; + /* + * Re-enable traps necessary for the current state of the guest, e.g. + * those enabled by a guest hypervisor. The ERET to the guest will + * provide the necessary context synchronization. + */ + __activate_cptr_traps(vcpu); + return true; } diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index e9198e56e784..3206b2c07f82 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -69,7 +69,10 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) if (!guest_owns_fp_regs()) return; - cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN); + /* + * Traps have been disabled by __deactivate_cptr_traps(), but there + * hasn't necessarily been a context synchronization event yet. + */ isb(); if (vcpu_has_sve(vcpu)) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 95d7534c9679..8957734d6183 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -479,6 +479,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) { struct kvm_mem_range cur; kvm_pte_t pte; + u64 granule; s8 level; int ret; @@ -496,18 +497,21 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) return -EPERM; } - do { - u64 granule = kvm_granule_size(level); + for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) { + if (!kvm_level_supports_block_mapping(level)) + continue; + granule = kvm_granule_size(level); cur.start = ALIGN_DOWN(addr, granule); cur.end = cur.start + granule; - level++; - } while ((level <= KVM_PGTABLE_LAST_LEVEL) && - !(kvm_level_supports_block_mapping(level) && - range_included(&cur, range))); + if (!range_included(&cur, range)) + continue; + *range = cur; + return 0; + } - *range = cur; + WARN_ON(1); - return 0; + return -EINVAL; } int host_stage2_idmap_locked(phys_addr_t addr, u64 size, diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 73affe1333a4..0e752b515d0f 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -47,65 +47,6 @@ struct fgt_masks hdfgwtr2_masks; extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc); -static void __activate_cptr_traps(struct kvm_vcpu *vcpu) -{ - u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */ - - if (!guest_owns_fp_regs()) - __activate_traps_fpsimd32(vcpu); - - if (has_hvhe()) { - val |= CPACR_EL1_TTA; - - if (guest_owns_fp_regs()) { - val |= CPACR_EL1_FPEN; - if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; - } - - write_sysreg(val, cpacr_el1); - } else { - val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1; - - /* - * Always trap SME since it's not supported in KVM. - * TSM is RES1 if SME isn't implemented. - */ - val |= CPTR_EL2_TSM; - - if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs()) - val |= CPTR_EL2_TZ; - - if (!guest_owns_fp_regs()) - val |= CPTR_EL2_TFP; - - write_sysreg(val, cptr_el2); - } -} - -static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) -{ - if (has_hvhe()) { - u64 val = CPACR_EL1_FPEN; - - if (cpus_have_final_cap(ARM64_SVE)) - val |= CPACR_EL1_ZEN; - if (cpus_have_final_cap(ARM64_SME)) - val |= CPACR_EL1_SMEN; - - write_sysreg(val, cpacr_el1); - } else { - u64 val = CPTR_NVHE_EL2_RES1; - - if (!cpus_have_final_cap(ARM64_SVE)) - val |= CPTR_EL2_TZ; - if (!cpus_have_final_cap(ARM64_SME)) - val |= CPTR_EL2_TSM; - - write_sysreg(val, cptr_el2); - } -} - static void __activate_traps(struct kvm_vcpu *vcpu) { ___activate_traps(vcpu, vcpu->arch.hcr_el2); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 09df2b42bc1b..477f1580ffea 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -90,87 +90,6 @@ static u64 __compute_hcr(struct kvm_vcpu *vcpu) return hcr | (guest_hcr & ~NV_HCR_GUEST_EXCLUDE); } -static void __activate_cptr_traps(struct kvm_vcpu *vcpu) -{ - u64 cptr; - - /* - * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to - * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, - * except for some missing controls, such as TAM. - * In this case, CPTR_EL2.TAM has the same position with or without - * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM - * shift value for trapping the AMU accesses. - */ - u64 val = CPACR_EL1_TTA | CPTR_EL2_TAM; - - if (guest_owns_fp_regs()) { - val |= CPACR_EL1_FPEN; - if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; - } else { - __activate_traps_fpsimd32(vcpu); - } - - if (!vcpu_has_nv(vcpu)) - goto write; - - /* - * The architecture is a bit crap (what a surprise): an EL2 guest - * writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA, - * as they are RES0 in the guest's view. To work around it, trap the - * sucker using the very same bit it can't set... - */ - if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu)) - val |= CPTR_EL2_TCPAC; - - /* - * Layer the guest hypervisor's trap configuration on top of our own if - * we're in a nested context. - */ - if (is_hyp_ctxt(vcpu)) - goto write; - - cptr = vcpu_sanitised_cptr_el2(vcpu); - - /* - * Pay attention, there's some interesting detail here. - * - * The CPTR_EL2.xEN fields are 2 bits wide, although there are only two - * meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest): - * - * - CPTR_EL2.xEN = x0, traps are enabled - * - CPTR_EL2.xEN = x1, traps are disabled - * - * In other words, bit[0] determines if guest accesses trap or not. In - * the interest of simplicity, clear the entire field if the guest - * hypervisor has traps enabled to dispel any illusion of something more - * complicated taking place. - */ - if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0))) - val &= ~CPACR_EL1_FPEN; - if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0))) - val &= ~CPACR_EL1_ZEN; - - if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP)) - val |= cptr & CPACR_EL1_E0POE; - - val |= cptr & CPTR_EL2_TCPAC; - -write: - write_sysreg(val, cpacr_el1); -} - -static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) -{ - u64 val = CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN; - - if (cpus_have_final_cap(ARM64_SME)) - val |= CPACR_EL1_SMEN_EL1EN; - - write_sysreg(val, cpacr_el1); -} - static void __activate_traps(struct kvm_vcpu *vcpu) { u64 val; @@ -639,10 +558,10 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; - sysreg_save_host_state_vhe(host_ctxt); - fpsimd_lazy_switch_to_guest(vcpu); + sysreg_save_host_state_vhe(host_ctxt); + /* * Note that ARM erratum 1165522 requires us to configure both stage 1 * and stage 2 translation for the guest context before we clear @@ -667,15 +586,23 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __deactivate_traps(vcpu); - fpsimd_lazy_switch_to_host(vcpu); - sysreg_restore_host_state_vhe(host_ctxt); + __debug_switch_to_host(vcpu); + + /* + * Ensure that all system register writes above have taken effect + * before returning to the host. In VHE mode, CPTR traps for + * FPSIMD/SVE/SME also apply to EL2, so FPSIMD/SVE/SME state must be + * manipulated after the ISB. + */ + isb(); + + fpsimd_lazy_switch_to_host(vcpu); + if (guest_owns_fp_regs()) __fpsimd_save_fpexc32(vcpu); - __debug_switch_to_host(vcpu); - return exit_code; } NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); @@ -705,12 +632,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) */ local_daif_restore(DAIF_PROCCTX_NOIRQ); - /* - * When we exit from the guest we change a number of CPU configuration - * parameters, such as traps. We rely on the isb() in kvm_call_hyp*() - * to make sure these changes take effect before running the host or - * additional guests. - */ return ret; } diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 5b191f4dc566..dc1d26559bfa 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1402,6 +1402,21 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) } } +#define has_tgran_2(__r, __sz) \ + ({ \ + u64 _s1, _s2, _mmfr0 = __r; \ + \ + _s2 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ + TGRAN##__sz##_2, _mmfr0); \ + \ + _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \ + TGRAN##__sz, _mmfr0); \ + \ + ((_s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_NI && \ + _s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz) || \ + (_s2 == ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz && \ + _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI)); \ + }) /* * Our emulated CPU doesn't support all the possible features. For the * sake of simplicity (and probably mental sanity), wipe out a number @@ -1411,6 +1426,8 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu) */ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) { + u64 orig_val = val; + switch (reg) { case SYS_ID_AA64ISAR0_EL1: /* Support everything but TME */ @@ -1480,13 +1497,16 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val) */ switch (PAGE_SIZE) { case SZ_4K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP); + if (has_tgran_2(orig_val, 4)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP); fallthrough; case SZ_16K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP); + if (has_tgran_2(orig_val, 16)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP); fallthrough; case SZ_64K: - val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP); + if (has_tgran_2(orig_val, 64)) + val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP); break; } diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c index d22a8ad7bcc5..679aafe77de2 100644 --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -36,6 +36,11 @@ struct shadow_if { static DEFINE_PER_CPU(struct shadow_if, shadow_if); +static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx) +{ + return hweight16(shadow_if->lr_map & (BIT(idx) - 1)); +} + /* * Nesting GICv3 support * @@ -209,6 +214,29 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu) return reg; } +static u64 translate_lr_pintid(struct kvm_vcpu *vcpu, u64 lr) +{ + struct vgic_irq *irq; + + if (!(lr & ICH_LR_HW)) + return lr; + + /* We have the HW bit set, check for validity of pINTID */ + irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); + /* If there was no real mapping, nuke the HW bit */ + if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI) + lr &= ~ICH_LR_HW; + + /* Translate the virtual mapping to the real one, even if invalid */ + if (irq) { + lr &= ~ICH_LR_PHYS_ID_MASK; + lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid); + vgic_put_irq(vcpu->kvm, irq); + } + + return lr; +} + /* * For LRs which have HW bit set such as timer interrupts, we modify them to * have the host hardware interrupt number instead of the virtual one programmed @@ -217,58 +245,37 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu) static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu, struct vgic_v3_cpu_if *s_cpu_if) { - unsigned long lr_map = 0; - int index = 0; + struct shadow_if *shadow_if; + + shadow_if = container_of(s_cpu_if, struct shadow_if, cpuif); + shadow_if->lr_map = 0; for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) { u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); - struct vgic_irq *irq; if (!(lr & ICH_LR_STATE)) - lr = 0; - - if (!(lr & ICH_LR_HW)) - goto next; - - /* We have the HW bit set, check for validity of pINTID */ - irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); - if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI ) { - /* There was no real mapping, so nuke the HW bit */ - lr &= ~ICH_LR_HW; - if (irq) - vgic_put_irq(vcpu->kvm, irq); - goto next; - } - - /* Translate the virtual mapping to the real one */ - lr &= ~ICH_LR_PHYS_ID_MASK; - lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid); + continue; - vgic_put_irq(vcpu->kvm, irq); + lr = translate_lr_pintid(vcpu, lr); -next: - s_cpu_if->vgic_lr[index] = lr; - if (lr) { - lr_map |= BIT(i); - index++; - } + s_cpu_if->vgic_lr[hweight16(shadow_if->lr_map)] = lr; + shadow_if->lr_map |= BIT(i); } - container_of(s_cpu_if, struct shadow_if, cpuif)->lr_map = lr_map; - s_cpu_if->used_lrs = index; + s_cpu_if->used_lrs = hweight16(shadow_if->lr_map); } void vgic_v3_sync_nested(struct kvm_vcpu *vcpu) { struct shadow_if *shadow_if = get_shadow_if(); - int i, index = 0; + int i; for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); struct vgic_irq *irq; if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE)) - goto next; + continue; /* * If we had a HW lr programmed by the guest hypervisor, we @@ -277,15 +284,13 @@ void vgic_v3_sync_nested(struct kvm_vcpu *vcpu) */ irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */ - goto next; + continue; - lr = __gic_v3_get_lr(index); + lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i)); if (!(lr & ICH_LR_STATE)) irq->active = false; vgic_put_irq(vcpu->kvm, irq); - next: - index++; } } @@ -368,13 +373,11 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu) val = __vcpu_sys_reg(vcpu, ICH_LRN(i)); val &= ~ICH_LR_STATE; - val |= s_cpu_if->vgic_lr[i] & ICH_LR_STATE; + val |= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE; __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val); - s_cpu_if->vgic_lr[i] = 0; } - shadow_if->lr_map = 0; vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0; } @@ -398,9 +401,7 @@ void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu) { bool level; - level = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En; - if (level) - level &= vgic_v3_get_misr(vcpu); + level = (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En) && vgic_v3_get_misr(vcpu); kvm_vgic_inject_irq(vcpu->kvm, vcpu, vcpu->kvm->arch.vgic.mi_intid, level, vcpu); } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ec0a337891dd..11eb8d1adc84 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -487,17 +487,29 @@ static void do_bad_area(unsigned long far, unsigned long esr, } } -static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma, - unsigned int mm_flags) +static bool fault_from_pkey(struct vm_area_struct *vma, unsigned int mm_flags) { - unsigned long iss2 = ESR_ELx_ISS2(esr); - if (!system_supports_poe()) return false; - if (esr_fsc_is_permission_fault(esr) && (iss2 & ESR_ELx_Overlay)) - return true; - + /* + * We do not check whether an Overlay fault has occurred because we + * cannot make a decision based solely on its value: + * + * - If Overlay is set, a fault did occur due to POE, but it may be + * spurious in those cases where we update POR_EL0 without ISB (e.g. + * on context-switch). We would then need to manually check POR_EL0 + * against vma_pkey(vma), which is exactly what + * arch_vma_access_permitted() does. + * + * - If Overlay is not set, we may still need to report a pkey fault. + * This is the case if an access was made within a mapping but with no + * page mapped, and POR_EL0 forbids the access (according to + * vma_pkey()). Such access will result in a SIGSEGV regardless + * because core code checks arch_vma_access_permitted(), but in order + * to report the correct error code - SEGV_PKUERR - we must handle + * that case here. + */ return !arch_vma_access_permitted(vma, mm_flags & FAULT_FLAG_WRITE, mm_flags & FAULT_FLAG_INSTRUCTION, @@ -635,7 +647,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, goto bad_area; } - if (fault_from_pkey(esr, vma, mm_flags)) { + if (fault_from_pkey(vma, mm_flags)) { pkey = vma_pkey(vma); vma_end_read(vma); fault = 0; @@ -679,7 +691,7 @@ retry: goto bad_area; } - if (fault_from_pkey(esr, vma, mm_flags)) { + if (fault_from_pkey(vma, mm_flags)) { pkey = vma_pkey(vma); mmap_read_unlock(mm); fault = 0; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 8fcf59ba39db..00ab1d648db6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1305,7 +1305,8 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr) next = addr; end = addr + PUD_SIZE; do { - pmd_free_pte_page(pmdp, next); + if (pmd_present(pmdp_get(pmdp))) + pmd_free_pte_page(pmdp, next); } while (pmdp++, next += PMD_SIZE, next != end); pud_clear(pudp); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 80d470aa469d..54dccfd6aa11 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -518,7 +518,6 @@ alternative_else_nop_endif msr REG_PIR_EL1, x0 orr tcr2, tcr2, TCR2_EL1_PIE - msr REG_TCR2_EL1, x0 .Lskip_indirection: diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index fe198b473f84..e739dbc6329d 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -18,12 +18,12 @@ /* * This gives the physical RAM offset. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifndef PHYS_OFFSET #define PHYS_OFFSET _UL(0) #endif extern unsigned long vm_map_base; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #ifndef IO_BASE #define IO_BASE CSR_DMW0_BASE @@ -66,7 +66,7 @@ extern unsigned long vm_map_base; #define FIXADDR_TOP ((unsigned long)(long)(int)0xfffe0000) #endif -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ATYPE_ #define _ATYPE32_ #define _ATYPE64_ @@ -85,7 +85,7 @@ extern unsigned long vm_map_base; /* * 32/64-bit LoongArch address spaces */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ACAST32_ #define _ACAST64_ #else diff --git a/arch/loongarch/include/asm/alternative-asm.h b/arch/loongarch/include/asm/alternative-asm.h index ff3d10ac393f..7dc29bd9b2f0 100644 --- a/arch/loongarch/include/asm/alternative-asm.h +++ b/arch/loongarch/include/asm/alternative-asm.h @@ -2,7 +2,7 @@ #ifndef _ASM_ALTERNATIVE_ASM_H #define _ASM_ALTERNATIVE_ASM_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #include <asm/asm.h> @@ -77,6 +77,6 @@ .previous .endm -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_ALTERNATIVE_ASM_H */ diff --git a/arch/loongarch/include/asm/alternative.h b/arch/loongarch/include/asm/alternative.h index cee7b29785ab..b5bae21fb3c8 100644 --- a/arch/loongarch/include/asm/alternative.h +++ b/arch/loongarch/include/asm/alternative.h @@ -2,7 +2,7 @@ #ifndef _ASM_ALTERNATIVE_H #define _ASM_ALTERNATIVE_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> #include <linux/stddef.h> @@ -106,6 +106,6 @@ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); #define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ (asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_ALTERNATIVE_H */ diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index df05005f2b80..d60bdf2e6377 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -7,7 +7,7 @@ #define EX_TYPE_UACCESS_ERR_ZERO 2 #define EX_TYPE_BPF 3 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ .pushsection __ex_table, "a"; \ @@ -22,7 +22,7 @@ __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0) .endm -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #include <linux/bits.h> #include <linux/stringify.h> @@ -60,6 +60,6 @@ #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/loongarch/include/asm/asm.h b/arch/loongarch/include/asm/asm.h index f591b3245def..f018d26fc995 100644 --- a/arch/loongarch/include/asm/asm.h +++ b/arch/loongarch/include/asm/asm.h @@ -110,7 +110,7 @@ #define LONG_SRA srai.w #define LONG_SRAV sra.w -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define LONG .word #endif #define LONGSIZE 4 @@ -131,7 +131,7 @@ #define LONG_SRA srai.d #define LONG_SRAV sra.d -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define LONG .dword #endif #define LONGSIZE 8 @@ -158,7 +158,7 @@ #define PTR_SCALESHIFT 2 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define PTR .word #endif #define PTRSIZE 4 @@ -181,7 +181,7 @@ #define PTR_SCALESHIFT 3 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define PTR .dword #endif #define PTRSIZE 8 diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index 98cf4d7b4b0a..dfb982fe8701 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -46,7 +46,7 @@ #define PRID_PRODUCT_MASK 0x0fff -#if !defined(__ASSEMBLY__) +#if !defined(__ASSEMBLER__) enum cpu_type_enum { CPU_UNKNOWN, @@ -55,7 +55,7 @@ enum cpu_type_enum { CPU_LAST }; -#endif /* !__ASSEMBLY */ +#endif /* !__ASSEMBLER__ */ /* * ISA Level encodings diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index 6e0a99763a9a..f4caaf764f9e 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -14,7 +14,7 @@ #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifndef CONFIG_DYNAMIC_FTRACE @@ -84,7 +84,7 @@ __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr) #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/loongarch/include/asm/gpr-num.h b/arch/loongarch/include/asm/gpr-num.h index 996038da806d..af95b941f48b 100644 --- a/arch/loongarch/include/asm/gpr-num.h +++ b/arch/loongarch/include/asm/gpr-num.h @@ -2,7 +2,7 @@ #ifndef __ASM_GPR_NUM_H #define __ASM_GPR_NUM_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .equ .L__gpr_num_zero, 0 .irp num,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 @@ -25,7 +25,7 @@ .equ .L__gpr_num_$s\num, 23 + \num .endr -#else /* __ASSEMBLY__ */ +#else /* __ASSEMBLER__ */ #define __DEFINE_ASM_GPR_NUMS \ " .equ .L__gpr_num_zero, 0\n" \ @@ -47,6 +47,6 @@ " .equ .L__gpr_num_$s\\num, 23 + \\num\n" \ " .endr\n" \ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_GPR_NUM_H */ diff --git a/arch/loongarch/include/asm/irqflags.h b/arch/loongarch/include/asm/irqflags.h index 003172b8406b..620163628a7f 100644 --- a/arch/loongarch/include/asm/irqflags.h +++ b/arch/loongarch/include/asm/irqflags.h @@ -5,7 +5,7 @@ #ifndef _ASM_IRQFLAGS_H #define _ASM_IRQFLAGS_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/compiler.h> #include <linux/stringify.h> @@ -80,6 +80,6 @@ static inline int arch_irqs_disabled(void) return arch_irqs_disabled_flags(arch_local_save_flags()); } -#endif /* #ifndef __ASSEMBLY__ */ +#endif /* #ifndef __ASSEMBLER__ */ #endif /* _ASM_IRQFLAGS_H */ diff --git a/arch/loongarch/include/asm/jump_label.h b/arch/loongarch/include/asm/jump_label.h index 8a924bd69d19..4000c7603d8e 100644 --- a/arch/loongarch/include/asm/jump_label.h +++ b/arch/loongarch/include/asm/jump_label.h @@ -7,7 +7,7 @@ #ifndef __ASM_JUMP_LABEL_H #define __ASM_JUMP_LABEL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/types.h> @@ -50,5 +50,5 @@ l_yes: return true; } -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_JUMP_LABEL_H */ diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h index 7f52bd31b9d4..62f139a9c87d 100644 --- a/arch/loongarch/include/asm/kasan.h +++ b/arch/loongarch/include/asm/kasan.h @@ -2,7 +2,7 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/linkage.h> #include <linux/mmzone.h> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index d84dac88a584..a0994d226eff 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -9,15 +9,15 @@ #include <linux/linkage.h> #include <linux/types.h> -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <larchintrin.h> /* CPUCFG */ #define read_cpucfg(reg) __cpucfg(reg) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* LoongArch Registers */ #define REG_ZERO 0x0 @@ -53,7 +53,7 @@ #define REG_S7 0x1e #define REG_S8 0x1f -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* Bit fields for CPUCFG registers */ #define LOONGARCH_CPUCFG0 0x0 @@ -171,7 +171,7 @@ * SW emulation for KVM hypervirsor, see arch/loongarch/include/uapi/asm/kvm_para.h */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* CSR */ #define csr_read32(reg) __csrrd_w(reg) @@ -187,7 +187,7 @@ #define iocsr_write32(val, reg) __iocsrwr_w(val, reg) #define iocsr_write64(val, reg) __iocsrwr_d(val, reg) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* CSR register number */ @@ -1195,7 +1195,7 @@ #define LOONGARCH_IOCSR_EXTIOI_ROUTE_BASE 0x1c00 #define IOCSR_EXTIOI_VECTOR_NUM 256 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ static __always_inline u64 drdtime(void) { @@ -1357,7 +1357,7 @@ __BUILD_CSR_OP(tlbidx) #define clear_csr_estat(val) \ csr_xchg32(~(val), val, LOONGARCH_CSR_ESTAT) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ /* Generic EntryLo bit definitions */ #define ENTRYLO_V (_ULCAST_(1) << 0) diff --git a/arch/loongarch/include/asm/orc_types.h b/arch/loongarch/include/asm/orc_types.h index caf1f71a1057..d5fa98d1d177 100644 --- a/arch/loongarch/include/asm/orc_types.h +++ b/arch/loongarch/include/asm/orc_types.h @@ -34,7 +34,7 @@ #define ORC_TYPE_REGS 3 #define ORC_TYPE_REGS_PARTIAL 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This struct is more or less a vastly simplified version of the DWARF Call * Frame Information standard. It contains only the necessary parts of DWARF @@ -53,6 +53,6 @@ struct orc_entry { unsigned int type:3; unsigned int signal:1; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ORC_TYPES_H */ diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index 7368f12b7cb1..a3aaf34fba16 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -15,7 +15,7 @@ #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/kernel.h> #include <linux/pfn.h> @@ -110,6 +110,6 @@ extern int __virt_addr_valid(volatile void *kaddr); #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_PAGE_H */ diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h index 45bfc65a0c9f..7bbfb04a54cc 100644 --- a/arch/loongarch/include/asm/pgtable-bits.h +++ b/arch/loongarch/include/asm/pgtable-bits.h @@ -92,7 +92,7 @@ #define PAGE_KERNEL_WUC __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \ _PAGE_GLOBAL | _PAGE_KERN | _CACHE_WUC) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL_SUC) @@ -127,6 +127,6 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot) return __pgprot(prot); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_PGTABLE_BITS_H */ diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index b30185302c07..f2aeff544cee 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -55,7 +55,7 @@ #define USER_PTRS_PER_PGD ((TASK_SIZE64 / PGDIR_SIZE)?(TASK_SIZE64 / PGDIR_SIZE):1) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <linux/mm_types.h> #include <linux/mmzone.h> @@ -618,6 +618,6 @@ static inline long pmd_protnone(pmd_t pmd) #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_PGTABLE_H */ diff --git a/arch/loongarch/include/asm/prefetch.h b/arch/loongarch/include/asm/prefetch.h index 1672262a5e2e..0b168cdaae9a 100644 --- a/arch/loongarch/include/asm/prefetch.h +++ b/arch/loongarch/include/asm/prefetch.h @@ -8,7 +8,7 @@ #define Pref_Load 0 #define Pref_Store 8 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro __pref hint addr #ifdef CONFIG_CPU_HAS_PREFETCH diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index ad0bd234a0f1..3a47f52959a8 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -39,7 +39,7 @@ int loongson_cpu_disable(void); void loongson_cpu_die(unsigned int cpu); #endif -static inline void plat_smp_setup(void) +static inline void __init plat_smp_setup(void) { loongson_smp_setup(); } diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h index 4f5a9441754e..9dfa2ef00816 100644 --- a/arch/loongarch/include/asm/thread_info.h +++ b/arch/loongarch/include/asm/thread_info.h @@ -10,7 +10,7 @@ #ifdef __KERNEL__ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/processor.h> @@ -53,7 +53,7 @@ static inline struct thread_info *current_thread_info(void) register unsigned long current_stack_pointer __asm__("$sp"); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* thread information allocation */ #define THREAD_SIZE SZ_16K diff --git a/arch/loongarch/include/asm/types.h b/arch/loongarch/include/asm/types.h index baf15a0dcf8b..0edd731f3d6a 100644 --- a/arch/loongarch/include/asm/types.h +++ b/arch/loongarch/include/asm/types.h @@ -8,7 +8,7 @@ #include <asm-generic/int-ll64.h> #include <uapi/asm/types.h> -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #define _ULCAST_ #define _U64CAST_ #else diff --git a/arch/loongarch/include/asm/unwind_hints.h b/arch/loongarch/include/asm/unwind_hints.h index 2c68bc72736c..16c7f7e465a0 100644 --- a/arch/loongarch/include/asm/unwind_hints.h +++ b/arch/loongarch/include/asm/unwind_hints.h @@ -5,7 +5,7 @@ #include <linux/objtool.h> #include <asm/orc_types.h> -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro UNWIND_HINT_UNDEFINED UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED @@ -23,7 +23,7 @@ UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_CALL .endm -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ #define UNWIND_HINT_SAVE \ UNWIND_HINT(UNWIND_HINT_TYPE_SAVE, 0, 0, 0) @@ -31,6 +31,6 @@ #define UNWIND_HINT_RESTORE \ UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0) -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _ASM_LOONGARCH_UNWIND_HINTS_H */ diff --git a/arch/loongarch/include/asm/vdso/arch_data.h b/arch/loongarch/include/asm/vdso/arch_data.h index 322d0a5f1c84..395ec223bcbe 100644 --- a/arch/loongarch/include/asm/vdso/arch_data.h +++ b/arch/loongarch/include/asm/vdso/arch_data.h @@ -7,7 +7,7 @@ #ifndef _VDSO_ARCH_DATA_H #define _VDSO_ARCH_DATA_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/asm.h> #include <asm/vdso.h> @@ -20,6 +20,6 @@ struct vdso_arch_data { struct vdso_pcpu_data pdata[NR_CPUS]; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/loongarch/include/asm/vdso/getrandom.h b/arch/loongarch/include/asm/vdso/getrandom.h index a81724b69f29..2ff05003c6e7 100644 --- a/arch/loongarch/include/asm/vdso/getrandom.h +++ b/arch/loongarch/include/asm/vdso/getrandom.h @@ -5,7 +5,7 @@ #ifndef __ASM_VDSO_GETRANDOM_H #define __ASM_VDSO_GETRANDOM_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/unistd.h> #include <asm/vdso/vdso.h> @@ -28,6 +28,6 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns return ret; } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/loongarch/include/asm/vdso/gettimeofday.h b/arch/loongarch/include/asm/vdso/gettimeofday.h index f15503e3336c..dcafabca9bb6 100644 --- a/arch/loongarch/include/asm/vdso/gettimeofday.h +++ b/arch/loongarch/include/asm/vdso/gettimeofday.h @@ -7,7 +7,7 @@ #ifndef __ASM_VDSO_GETTIMEOFDAY_H #define __ASM_VDSO_GETTIMEOFDAY_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/unistd.h> #include <asm/vdso/vdso.h> @@ -89,6 +89,6 @@ static inline bool loongarch_vdso_hres_capable(void) } #define __arch_vdso_hres_capable loongarch_vdso_hres_capable -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/loongarch/include/asm/vdso/processor.h b/arch/loongarch/include/asm/vdso/processor.h index ef5770b343a0..1e255373b0b8 100644 --- a/arch/loongarch/include/asm/vdso/processor.h +++ b/arch/loongarch/include/asm/vdso/processor.h @@ -5,10 +5,10 @@ #ifndef __ASM_VDSO_PROCESSOR_H #define __ASM_VDSO_PROCESSOR_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define cpu_relax() barrier() -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h index 50c65fb29daf..04bd2d452876 100644 --- a/arch/loongarch/include/asm/vdso/vdso.h +++ b/arch/loongarch/include/asm/vdso/vdso.h @@ -7,7 +7,7 @@ #ifndef _ASM_VDSO_VDSO_H #define _ASM_VDSO_VDSO_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <asm/asm.h> #include <asm/page.h> @@ -16,6 +16,6 @@ #define VVAR_SIZE (VDSO_NR_PAGES << PAGE_SHIFT) -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/loongarch/include/asm/vdso/vsyscall.h b/arch/loongarch/include/asm/vdso/vsyscall.h index 1140b54b4bc8..558eb9dfda52 100644 --- a/arch/loongarch/include/asm/vdso/vsyscall.h +++ b/arch/loongarch/include/asm/vdso/vsyscall.h @@ -2,13 +2,13 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include <vdso/datapage.h> /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index a54cd6fd3796..1367ca759468 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -10,6 +10,7 @@ #include <linux/init.h> #include <linux/acpi.h> #include <linux/efi-bgrt.h> +#include <linux/export.h> #include <linux/irq.h> #include <linux/irqdomain.h> #include <linux/memblock.h> diff --git a/arch/loongarch/kernel/alternative.c b/arch/loongarch/kernel/alternative.c index 4ad13847e962..0e0c766df1e3 100644 --- a/arch/loongarch/kernel/alternative.c +++ b/arch/loongarch/kernel/alternative.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <linux/export.h> #include <linux/mm.h> #include <linux/module.h> #include <asm/alternative.h> diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index de21e72759ee..860a3bc030e0 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -144,6 +144,18 @@ void __init efi_init(void) if (efi_memmap_init_early(&data) < 0) panic("Unable to map EFI memory map.\n"); + /* + * Reserve the physical memory region occupied by the EFI + * memory map table (header + descriptors). This is crucial + * for kdump, as the kdump kernel relies on this original + * memmap passed by the bootloader. Without reservation, + * this region could be overwritten by the primary kernel. + * Also, set the EFI_PRESERVE_BS_REGIONS flag to indicate that + * critical boot services code/data regions like this are preserved. + */ + memblock_reserve((phys_addr_t)boot_memmap, sizeof(*tbl) + data.size); + set_bit(EFI_PRESERVE_BS_REGIONS, &efi.flags); + early_memunmap(tbl, sizeof(*tbl)); } diff --git a/arch/loongarch/kernel/elf.c b/arch/loongarch/kernel/elf.c index 0fa81ced28dc..3d98c6aa00db 100644 --- a/arch/loongarch/kernel/elf.c +++ b/arch/loongarch/kernel/elf.c @@ -6,7 +6,6 @@ #include <linux/binfmts.h> #include <linux/elf.h> -#include <linux/export.h> #include <linux/sched.h> #include <asm/cpu-features.h> diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c index 4c476904227f..141b49bd989c 100644 --- a/arch/loongarch/kernel/kfpu.c +++ b/arch/loongarch/kernel/kfpu.c @@ -4,6 +4,7 @@ */ #include <linux/cpu.h> +#include <linux/export.h> #include <linux/init.h> #include <asm/fpu.h> #include <asm/smp.h> diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c index e5a39bbad078..b1b51f920b23 100644 --- a/arch/loongarch/kernel/paravirt.c +++ b/arch/loongarch/kernel/paravirt.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/export.h> #include <linux/types.h> #include <linux/interrupt.h> #include <linux/irq_work.h> diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index bc75a3a69fc8..367906b10f81 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -102,7 +102,7 @@ static int constant_timer_next_event(unsigned long delta, struct clock_event_dev return 0; } -static unsigned long __init get_loops_per_jiffy(void) +static unsigned long get_loops_per_jiffy(void) { unsigned long lpj = (unsigned long)const_clock_freq; diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 47fc2de6d150..3d9be6ca7ec5 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/kexec.h> #include <linux/module.h> +#include <linux/export.h> #include <linux/extable.h> #include <linux/mm.h> #include <linux/sched/mm.h> diff --git a/arch/loongarch/kernel/unwind_guess.c b/arch/loongarch/kernel/unwind_guess.c index 98379b7d4147..08d7951b2f60 100644 --- a/arch/loongarch/kernel/unwind_guess.c +++ b/arch/loongarch/kernel/unwind_guess.c @@ -3,6 +3,7 @@ * Copyright (C) 2022 Loongson Technology Corporation Limited */ #include <asm/unwind.h> +#include <linux/export.h> unsigned long unwind_get_return_address(struct unwind_state *state) { diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c index d623935a7547..0005be49b056 100644 --- a/arch/loongarch/kernel/unwind_orc.c +++ b/arch/loongarch/kernel/unwind_orc.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only -#include <linux/objtool.h> +#include <linux/export.h> #include <linux/module.h> +#include <linux/objtool.h> #include <linux/sort.h> #include <asm/exception.h> #include <asm/orc_header.h> diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 929ae240280a..729e775bd40d 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -3,6 +3,7 @@ * Copyright (C) 2022 Loongson Technology Corporation Limited */ #include <linux/cpumask.h> +#include <linux/export.h> #include <linux/ftrace.h> #include <linux/kallsyms.h> diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index f39929d7bf8a..a75f865d6fb9 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -9,7 +9,8 @@ static void eiointc_set_sw_coreisr(struct loongarch_eiointc *s) { - int ipnum, cpu, irq_index, irq_mask, irq; + int ipnum, cpu, cpuid, irq_index, irq_mask, irq; + struct kvm_vcpu *vcpu; for (irq = 0; irq < EIOINTC_IRQS; irq++) { ipnum = s->ipmap.reg_u8[irq / 32]; @@ -20,7 +21,12 @@ static void eiointc_set_sw_coreisr(struct loongarch_eiointc *s) irq_index = irq / 32; irq_mask = BIT(irq & 0x1f); - cpu = s->coremap.reg_u8[irq]; + cpuid = s->coremap.reg_u8[irq]; + vcpu = kvm_get_vcpu_by_cpuid(s->kvm, cpuid); + if (!vcpu) + continue; + + cpu = vcpu->vcpu_id; if (!!(s->coreisr.reg_u32[cpu][irq_index] & irq_mask)) set_bit(irq, s->sw_coreisr[cpu][ipnum]); else @@ -66,20 +72,25 @@ static void eiointc_update_irq(struct loongarch_eiointc *s, int irq, int level) } static inline void eiointc_update_sw_coremap(struct loongarch_eiointc *s, - int irq, void *pvalue, u32 len, bool notify) + int irq, u64 val, u32 len, bool notify) { - int i, cpu; - u64 val = *(u64 *)pvalue; + int i, cpu, cpuid; + struct kvm_vcpu *vcpu; for (i = 0; i < len; i++) { - cpu = val & 0xff; + cpuid = val & 0xff; val = val >> 8; if (!(s->status & BIT(EIOINTC_ENABLE_CPU_ENCODE))) { - cpu = ffs(cpu) - 1; - cpu = (cpu >= 4) ? 0 : cpu; + cpuid = ffs(cpuid) - 1; + cpuid = (cpuid >= 4) ? 0 : cpuid; } + vcpu = kvm_get_vcpu_by_cpuid(s->kvm, cpuid); + if (!vcpu) + continue; + + cpu = vcpu->vcpu_id; if (s->sw_coremap[irq + i] == cpu) continue; @@ -305,6 +316,11 @@ static int kvm_eiointc_read(struct kvm_vcpu *vcpu, return -EINVAL; } + if (addr & (len - 1)) { + kvm_err("%s: eiointc not aligned addr %llx len %d\n", __func__, addr, len); + return -EINVAL; + } + vcpu->kvm->stat.eiointc_read_exits++; spin_lock_irqsave(&eiointc->lock, flags); switch (len) { @@ -398,7 +414,7 @@ static int loongarch_eiointc_writeb(struct kvm_vcpu *vcpu, irq = offset - EIOINTC_COREMAP_START; index = irq; s->coremap.reg_u8[index] = data; - eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + eiointc_update_sw_coremap(s, irq, data, sizeof(data), true); break; default: ret = -EINVAL; @@ -436,17 +452,16 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu, break; case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END: index = (offset - EIOINTC_ENABLE_START) >> 1; - old_data = s->enable.reg_u32[index]; + old_data = s->enable.reg_u16[index]; s->enable.reg_u16[index] = data; /* * 1: enable irq. * update irq when isr is set. */ data = s->enable.reg_u16[index] & ~old_data & s->isr.reg_u16[index]; - index = index << 1; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index + i, mask, 1); + eiointc_enable_irq(vcpu, s, index * 2 + i, mask, 1); } /* * 0: disable irq. @@ -455,7 +470,7 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu, data = ~s->enable.reg_u16[index] & old_data & s->isr.reg_u16[index]; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index, mask, 0); + eiointc_enable_irq(vcpu, s, index * 2 + i, mask, 0); } break; case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: @@ -484,7 +499,7 @@ static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu, irq = offset - EIOINTC_COREMAP_START; index = irq >> 1; s->coremap.reg_u16[index] = data; - eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + eiointc_update_sw_coremap(s, irq, data, sizeof(data), true); break; default: ret = -EINVAL; @@ -529,10 +544,9 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu, * update irq when isr is set. */ data = s->enable.reg_u32[index] & ~old_data & s->isr.reg_u32[index]; - index = index << 2; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index + i, mask, 1); + eiointc_enable_irq(vcpu, s, index * 4 + i, mask, 1); } /* * 0: disable irq. @@ -541,7 +555,7 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu, data = ~s->enable.reg_u32[index] & old_data & s->isr.reg_u32[index]; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index, mask, 0); + eiointc_enable_irq(vcpu, s, index * 4 + i, mask, 0); } break; case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: @@ -570,7 +584,7 @@ static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu, irq = offset - EIOINTC_COREMAP_START; index = irq >> 2; s->coremap.reg_u32[index] = data; - eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + eiointc_update_sw_coremap(s, irq, data, sizeof(data), true); break; default: ret = -EINVAL; @@ -615,10 +629,9 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu, * update irq when isr is set. */ data = s->enable.reg_u64[index] & ~old_data & s->isr.reg_u64[index]; - index = index << 3; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index + i, mask, 1); + eiointc_enable_irq(vcpu, s, index * 8 + i, mask, 1); } /* * 0: disable irq. @@ -627,7 +640,7 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu, data = ~s->enable.reg_u64[index] & old_data & s->isr.reg_u64[index]; for (i = 0; i < sizeof(data); i++) { u8 mask = (data >> (i * 8)) & 0xff; - eiointc_enable_irq(vcpu, s, index, mask, 0); + eiointc_enable_irq(vcpu, s, index * 8 + i, mask, 0); } break; case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END: @@ -656,7 +669,7 @@ static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu, irq = offset - EIOINTC_COREMAP_START; index = irq >> 3; s->coremap.reg_u64[index] = data; - eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true); + eiointc_update_sw_coremap(s, irq, data, sizeof(data), true); break; default: ret = -EINVAL; @@ -679,6 +692,11 @@ static int kvm_eiointc_write(struct kvm_vcpu *vcpu, return -EINVAL; } + if (addr & (len - 1)) { + kvm_err("%s: eiointc not aligned addr %llx len %d\n", __func__, addr, len); + return -EINVAL; + } + vcpu->kvm->stat.eiointc_write_exits++; spin_lock_irqsave(&eiointc->lock, flags); switch (len) { @@ -787,7 +805,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, int ret = 0; unsigned long flags; unsigned long type = (unsigned long)attr->attr; - u32 i, start_irq; + u32 i, start_irq, val; void __user *data; struct loongarch_eiointc *s = dev->kvm->arch.eiointc; @@ -795,8 +813,14 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, spin_lock_irqsave(&s->lock, flags); switch (type) { case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU: - if (copy_from_user(&s->num_cpu, data, 4)) + if (copy_from_user(&val, data, 4)) ret = -EFAULT; + else { + if (val >= EIOINTC_ROUTE_MAX_VCPUS) + ret = -EINVAL; + else + s->num_cpu = val; + } break; case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE: if (copy_from_user(&s->features, data, 4)) @@ -809,7 +833,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, for (i = 0; i < (EIOINTC_IRQS / 4); i++) { start_irq = i * 4; eiointc_update_sw_coremap(s, start_irq, - (void *)&s->coremap.reg_u32[i], sizeof(u32), false); + s->coremap.reg_u32[i], sizeof(u32), false); } break; default: @@ -824,7 +848,7 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, bool is_write) { - int addr, cpuid, offset, ret = 0; + int addr, cpu, offset, ret = 0; unsigned long flags; void *p = NULL; void __user *data; @@ -832,7 +856,7 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev, s = dev->kvm->arch.eiointc; addr = attr->attr; - cpuid = addr >> 16; + cpu = addr >> 16; addr &= 0xffff; data = (void __user *)attr->addr; switch (addr) { @@ -857,8 +881,11 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev, p = &s->isr.reg_u32[offset]; break; case EIOINTC_COREISR_START ... EIOINTC_COREISR_END: + if (cpu >= s->num_cpu) + return -EINVAL; + offset = (addr - EIOINTC_COREISR_START) / 4; - p = &s->coreisr.reg_u32[cpuid][offset]; + p = &s->coreisr.reg_u32[cpu][offset]; break; case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END: offset = (addr - EIOINTC_COREMAP_START) / 4; @@ -899,9 +926,15 @@ static int kvm_eiointc_sw_status_access(struct kvm_device *dev, data = (void __user *)attr->addr; switch (addr) { case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU: + if (is_write) + return ret; + p = &s->num_cpu; break; case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_FEATURE: + if (is_write) + return ret; + p = &s->features; break; case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE: diff --git a/arch/loongarch/lib/crc32-loongarch.c b/arch/loongarch/lib/crc32-loongarch.c index b37cd8537b45..db22c2ec55e2 100644 --- a/arch/loongarch/lib/crc32-loongarch.c +++ b/arch/loongarch/lib/crc32-loongarch.c @@ -11,6 +11,7 @@ #include <asm/cpu-features.h> #include <linux/crc32.h> +#include <linux/export.h> #include <linux/module.h> #include <linux/unaligned.h> diff --git a/arch/loongarch/lib/csum.c b/arch/loongarch/lib/csum.c index df309ae4045d..bcc9d01d8c41 100644 --- a/arch/loongarch/lib/csum.c +++ b/arch/loongarch/lib/csum.c @@ -2,6 +2,7 @@ // Copyright (C) 2019-2020 Arm Ltd. #include <linux/compiler.h> +#include <linux/export.h> #include <linux/kasan-checks.h> #include <linux/kernel.h> diff --git a/arch/loongarch/mm/ioremap.c b/arch/loongarch/mm/ioremap.c index 70ca73019811..df949a3d0f34 100644 --- a/arch/loongarch/mm/ioremap.c +++ b/arch/loongarch/mm/ioremap.c @@ -16,12 +16,12 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) } -void *early_memremap_ro(resource_size_t phys_addr, unsigned long size) +void * __init early_memremap_ro(resource_size_t phys_addr, unsigned long size) { return early_memremap(phys_addr, size); } -void *early_memremap_prot(resource_size_t phys_addr, unsigned long size, +void * __init early_memremap_prot(resource_size_t phys_addr, unsigned long size, unsigned long prot_val) { return early_memremap(phys_addr, size); diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c index 2726639150bc..5bc9627a6cf9 100644 --- a/arch/loongarch/pci/pci.c +++ b/arch/loongarch/pci/pci.c @@ -3,7 +3,6 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include <linux/kernel.h> -#include <linux/export.h> #include <linux/init.h> #include <linux/acpi.h> #include <linux/types.h> diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts index c4e4d2a9b460..b7eac4e56019 100644 --- a/arch/powerpc/boot/dts/microwatt.dts +++ b/arch/powerpc/boot/dts/microwatt.dts @@ -4,7 +4,7 @@ / { #size-cells = <0x02>; #address-cells = <0x02>; - model-name = "microwatt"; + model = "microwatt"; compatible = "microwatt-soc"; aliases { diff --git a/arch/powerpc/boot/dts/mpc8315erdb.dts b/arch/powerpc/boot/dts/mpc8315erdb.dts index e09b37d7489d..a89cb3139ca8 100644 --- a/arch/powerpc/boot/dts/mpc8315erdb.dts +++ b/arch/powerpc/boot/dts/mpc8315erdb.dts @@ -6,6 +6,7 @@ */ /dts-v1/; +#include <dt-bindings/interrupt-controller/irq.h> / { compatible = "fsl,mpc8315erdb"; @@ -358,6 +359,15 @@ interrupt-parent = <&ipic>; fsl,mpc8313-wakeup-timer = <>m1>; }; + + gpio: gpio-controller@c00 { + compatible = "fsl,mpc8314-gpio"; + reg = <0xc00 0x100>; + interrupts = <74 IRQ_TYPE_LEVEL_LOW>; + interrupt-parent = <&ipic>; + gpio-controller; + #gpio-cells = <2>; + }; }; pci0: pci@e0008500 { diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 02897f4b0dbf..b891910fce8a 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -183,7 +183,7 @@ /* * Used to name C functions called from asm */ -#ifdef CONFIG_PPC_KERNEL_PCREL +#if defined(__powerpc64__) && defined(CONFIG_PPC_KERNEL_PCREL) #define CFUNC(name) name@notoc #else #define CFUNC(name) name diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h index 2c145da3b774..b5211e413829 100644 --- a/arch/powerpc/include/uapi/asm/ioctls.h +++ b/arch/powerpc/include/uapi/asm/ioctls.h @@ -23,10 +23,10 @@ #define TCSETSW _IOW('t', 21, struct termios) #define TCSETSF _IOW('t', 22, struct termios) -#define TCGETA _IOR('t', 23, struct termio) -#define TCSETA _IOW('t', 24, struct termio) -#define TCSETAW _IOW('t', 25, struct termio) -#define TCSETAF _IOW('t', 28, struct termio) +#define TCGETA 0x40147417 /* _IOR('t', 23, struct termio) */ +#define TCSETA 0x80147418 /* _IOW('t', 24, struct termio) */ +#define TCSETAW 0x80147419 /* _IOW('t', 25, struct termio) */ +#define TCSETAF 0x8014741c /* _IOW('t', 28, struct termio) */ #define TCSBRK _IO('t', 29) #define TCXONC _IO('t', 30) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 83fe99861eb1..ca7f7bb2b478 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1509,6 +1509,8 @@ int eeh_pe_configure(struct eeh_pe *pe) /* Invalid PE ? */ if (!pe) return -ENODEV; + else + ret = eeh_ops->configure_bridge(pe); return ret; } diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index e8824f933326..8834dfe9d727 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -53,7 +53,7 @@ ldflags-$(CONFIG_LD_ORPHAN_WARN) += -Wl,--orphan-handling=$(CONFIG_LD_ORPHAN_WAR ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) CC32FLAGS := -m32 -CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc +CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc -mpcrel ifdef CONFIG_CC_IS_CLANG # This flag is supported by clang for 64-bit but not 32-bit so it will cause # an unused command line flag warning for this file. diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 36061f4732b7..d71ea0f4466f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -63,7 +63,8 @@ config RISCV select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_CFI_CLANG + # clang >= 17: https://github.com/llvm/llvm-project/commit/62fa708ceb027713b386c7e0efda994f8bdc27e2 + select ARCH_SUPPORTS_CFI_CLANG if CLANG_VERSION >= 170000 select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_HUGETLBFS if MMU diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 438ce7df24c3..5bd5aae60d53 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -1075,7 +1075,6 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) */ #ifdef CONFIG_64BIT #define TASK_SIZE_64 (PGDIR_SIZE * PTRS_PER_PGD / 2) -#define TASK_SIZE_MAX LONG_MAX #ifdef CONFIG_COMPAT #define TASK_SIZE_32 (_AC(0x80000000, UL) - PAGE_SIZE) diff --git a/arch/riscv/include/asm/runtime-const.h b/arch/riscv/include/asm/runtime-const.h index 451fd76b8811..d766e2b9e6df 100644 --- a/arch/riscv/include/asm/runtime-const.h +++ b/arch/riscv/include/asm/runtime-const.h @@ -206,7 +206,7 @@ static inline void __runtime_fixup_32(__le16 *lui_parcel, __le16 *addi_parcel, u addi_insn_mask &= 0x07fff; } - if (lower_immediate & 0x00000fff) { + if (lower_immediate & 0x00000fff || lui_insn == RISCV_INSN_NOP4) { /* replace upper 12 bits of addi with lower 12 bits of val */ addi_insn &= addi_insn_mask; addi_insn |= (lower_immediate & 0x00000fff) << 20; diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index d472da4450e6..525e50db24f7 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -127,6 +127,7 @@ do { \ #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT #define __get_user_8(x, ptr, label) \ +do { \ u32 __user *__ptr = (u32 __user *)(ptr); \ u32 __lo, __hi; \ asm_goto_output( \ @@ -141,7 +142,7 @@ do { \ : : label); \ (x) = (__typeof__(x))((__typeof__((x) - (x)))( \ (((u64)__hi << 32) | __lo))); \ - +} while (0) #else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ #define __get_user_8(x, ptr, label) \ do { \ diff --git a/arch/riscv/include/asm/vdso/getrandom.h b/arch/riscv/include/asm/vdso/getrandom.h index 8dc92441702a..c6d66895c1f5 100644 --- a/arch/riscv/include/asm/vdso/getrandom.h +++ b/arch/riscv/include/asm/vdso/getrandom.h @@ -18,7 +18,7 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns register unsigned int flags asm("a2") = _flags; asm volatile ("ecall\n" - : "+r" (ret) + : "=r" (ret) : "r" (nr), "r" (buffer), "r" (len), "r" (flags) : "memory"); diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index 45c9b426fcc5..b61786d43c20 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -205,11 +205,11 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, THEAD_VSETVLI_T4X0E8M8D1 THEAD_VSB_V_V0T0 "add t0, t0, t4\n\t" - THEAD_VSB_V_V0T0 + THEAD_VSB_V_V8T0 "add t0, t0, t4\n\t" - THEAD_VSB_V_V0T0 + THEAD_VSB_V_V16T0 "add t0, t0, t4\n\t" - THEAD_VSB_V_V0T0 + THEAD_VSB_V_V24T0 : : "r" (datap) : "memory", "t0", "t4"); } else { asm volatile ( @@ -241,11 +241,11 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ THEAD_VSETVLI_T4X0E8M8D1 THEAD_VLB_V_V0T0 "add t0, t0, t4\n\t" - THEAD_VLB_V_V0T0 + THEAD_VLB_V_V8T0 "add t0, t0, t4\n\t" - THEAD_VLB_V_V0T0 + THEAD_VLB_V_V16T0 "add t0, t0, t4\n\t" - THEAD_VLB_V_V0T0 + THEAD_VLB_V_V24T0 : : "r" (datap) : "memory", "t0", "t4"); } else { asm volatile ( diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c index e6fbaaf54956..87d655944803 100644 --- a/arch/riscv/kernel/cpu_ops_sbi.c +++ b/arch/riscv/kernel/cpu_ops_sbi.c @@ -18,10 +18,10 @@ const struct cpu_operations cpu_ops_sbi; /* * Ordered booting via HSM brings one cpu at a time. However, cpu hotplug can - * be invoked from multiple threads in parallel. Define a per cpu data + * be invoked from multiple threads in parallel. Define an array of boot data * to handle that. */ -static DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data); +static struct sbi_hart_boot_data boot_data[NR_CPUS]; static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr, unsigned long priv) @@ -67,7 +67,7 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle) unsigned long boot_addr = __pa_symbol(secondary_start_sbi); unsigned long hartid = cpuid_to_hartid_map(cpuid); unsigned long hsm_data; - struct sbi_hart_boot_data *bdata = &per_cpu(boot_data, cpuid); + struct sbi_hart_boot_data *bdata = &boot_data[cpuid]; /* Make sure tidle is updated */ smp_mb(); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index f7c9a1caa83e..14888e5ea19a 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -50,6 +50,7 @@ atomic_t hart_lottery __section(".sdata") #endif ; unsigned long boot_cpu_hartid; +EXPORT_SYMBOL_GPL(boot_cpu_hartid); /* * Place kernel memory regions on the resource tree so that diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index dd8e4af6583f..93043924fe6c 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -454,7 +454,7 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs) val.data_u64 = 0; if (user_mode(regs)) { - if (copy_from_user_nofault(&val, (u8 __user *)addr, len)) + if (copy_from_user(&val, (u8 __user *)addr, len)) return -1; } else { memcpy(&val, (u8 *)addr, len); @@ -555,7 +555,7 @@ static int handle_scalar_misaligned_store(struct pt_regs *regs) return -EOPNOTSUPP; if (user_mode(regs)) { - if (copy_to_user_nofault((u8 __user *)addr, &val, len)) + if (copy_to_user((u8 __user *)addr, &val, len)) return -1; } else { memcpy((u8 *)addr, &val, len); diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 7c15b0f4ee3b..c29ef12a63bb 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -30,7 +30,7 @@ SECTIONS *(.data .data.* .gnu.linkonce.d.*) *(.dynbss) *(.bss .bss.* .gnu.linkonce.b.*) - } + } :text .note : { *(.note.*) } :text :note diff --git a/arch/riscv/kernel/vendor_extensions/sifive.c b/arch/riscv/kernel/vendor_extensions/sifive.c index 1411337dc1e6..8fcf67e8c07f 100644 --- a/arch/riscv/kernel/vendor_extensions/sifive.c +++ b/arch/riscv/kernel/vendor_extensions/sifive.c @@ -8,7 +8,7 @@ #include <linux/types.h> /* All SiFive vendor extensions supported in Linux */ -const struct riscv_isa_ext_data riscv_isa_vendor_ext_sifive[] = { +static const struct riscv_isa_ext_data riscv_isa_vendor_ext_sifive[] = { __RISCV_ISA_EXT_DATA(xsfvfnrclipxfqf, RISCV_ISA_VENDOR_EXT_XSFVFNRCLIPXFQF), __RISCV_ISA_EXT_DATA(xsfvfwmaccqqq, RISCV_ISA_VENDOR_EXT_XSFVFWMACCQQQ), __RISCV_ISA_EXT_DATA(xsfvqmaccdod, RISCV_ISA_VENDOR_EXT_XSFVQMACCDOD), diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c index 5fbf3f94f1e8..b17fad091bab 100644 --- a/arch/riscv/kvm/vcpu_sbi_replace.c +++ b/arch/riscv/kvm/vcpu_sbi_replace.c @@ -103,7 +103,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_SENT); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: - if (cp->a2 == 0 && cp->a3 == 0) + if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL) kvm_riscv_hfence_vvma_all(vcpu->kvm, hbase, hmask); else kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask, @@ -111,7 +111,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_SENT); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: - if (cp->a2 == 0 && cp->a3 == 0) + if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL) kvm_riscv_hfence_vvma_asid_all(vcpu->kvm, hbase, hmask, cp->a4); else @@ -127,9 +127,9 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID: /* * Until nested virtualization is implemented, the - * SBI HFENCE calls should be treated as NOPs + * SBI HFENCE calls should return not supported + * hence fallthrough. */ - break; default: retdata->err_val = SBI_ERR_NOT_SUPPORTED; } diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index d229cbd2ba22..9b0d55be1239 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -38,6 +38,7 @@ static int s390_sha1_init(struct shash_desc *desc) sctx->state[4] = SHA1_H4; sctx->count = 0; sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = 0; return 0; } @@ -60,6 +61,7 @@ static int s390_sha1_import(struct shash_desc *desc, const void *in) sctx->count = ictx->count; memcpy(sctx->state, ictx->state, sizeof(ictx->state)); sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = 0; return 0; } diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 33711a29618c..6cbbf5e8555f 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -32,6 +32,7 @@ static int sha512_init(struct shash_desc *desc) ctx->count = 0; ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = 0; return 0; } @@ -57,6 +58,7 @@ static int sha512_import(struct shash_desc *desc, const void *in) memcpy(sctx->state, ictx->state, sizeof(ictx->state)); sctx->func = CPACF_KIMD_SHA_512; + sctx->first_message_part = 0; return 0; } @@ -97,6 +99,7 @@ static int sha384_init(struct shash_desc *desc) ctx->count = 0; ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = 0; return 0; } diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 62c0ab4a4b9d..0905fa99a31e 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -265,7 +265,7 @@ static __always_inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *r addr = kernel_stack_pointer(regs) + n * sizeof(long); if (!regs_within_kernel_stack(regs, addr)) return 0; - return READ_ONCE_NOCHECK(addr); + return READ_ONCE_NOCHECK(*(unsigned long *)addr); } /** diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 2fbee3887d13..d930416d4c90 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -54,6 +54,7 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res) case PCI_ERS_RESULT_CAN_RECOVER: case PCI_ERS_RESULT_RECOVERED: case PCI_ERS_RESULT_NEED_RESET: + case PCI_ERS_RESULT_NONE: return false; default: return true; @@ -78,10 +79,6 @@ static bool is_driver_supported(struct pci_driver *driver) return false; if (!driver->err_handler->error_detected) return false; - if (!driver->err_handler->slot_reset) - return false; - if (!driver->err_handler->resume) - return false; return true; } @@ -106,6 +103,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, struct zpci_dev *zdev = to_zpci(pdev); int rc; + /* The underlying device may have been disabled by the event */ + if (!zdev_enabled(zdev)) + return PCI_ERS_RESULT_NEED_RESET; + pr_info("%s: Unblocking device access for examination\n", pci_name(pdev)); rc = zpci_reset_load_store_blocked(zdev); if (rc) { @@ -114,16 +115,18 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, return PCI_ERS_RESULT_NEED_RESET; } - if (driver->err_handler->mmio_enabled) { + if (driver->err_handler->mmio_enabled) ers_res = driver->err_handler->mmio_enabled(pdev); - if (ers_result_indicates_abort(ers_res)) { - pr_info("%s: Automatic recovery failed after MMIO re-enable\n", - pci_name(pdev)); - return ers_res; - } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { - pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); - return ers_res; - } + else + ers_res = PCI_ERS_RESULT_NONE; + + if (ers_result_indicates_abort(ers_res)) { + pr_info("%s: Automatic recovery failed after MMIO re-enable\n", + pci_name(pdev)); + return ers_res; + } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { + pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); + return ers_res; } pr_debug("%s: Unblocking DMA\n", pci_name(pdev)); @@ -150,7 +153,12 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, return ers_res; } pdev->error_state = pci_channel_io_normal; - ers_res = driver->err_handler->slot_reset(pdev); + + if (driver->err_handler->slot_reset) + ers_res = driver->err_handler->slot_reset(pdev); + else + ers_res = PCI_ERS_RESULT_NONE; + if (ers_result_indicates_abort(ers_res)) { pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev)); return ers_res; @@ -214,7 +222,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) goto out_unlock; } - if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) { + if (ers_res != PCI_ERS_RESULT_NEED_RESET) { ers_res = zpci_event_do_error_state_clear(pdev, driver); if (ers_result_indicates_abort(ers_res)) { status_str = "failed (abort on MMIO enable)"; @@ -225,6 +233,16 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) if (ers_res == PCI_ERS_RESULT_NEED_RESET) ers_res = zpci_event_do_reset(pdev, driver); + /* + * ers_res can be PCI_ERS_RESULT_NONE either because the driver + * decided to return it, indicating that it abstains from voting + * on how to recover, or because it didn't implement the callback. + * Both cases assume, that if there is nothing else causing a + * disconnect, we recovered successfully. + */ + if (ers_res == PCI_ERS_RESULT_NONE) + ers_res = PCI_ERS_RESULT_RECOVERED; + if (ers_res != PCI_ERS_RESULT_RECOVERED) { pr_err("%s: Automatic recovery failed; operator intervention is required\n", pci_name(pdev)); @@ -273,6 +291,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); struct pci_dev *pdev = NULL; pci_ers_result_t ers_res; + u32 fh = 0; + int rc; zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n", ccdf->fid, ccdf->fh, ccdf->pec); @@ -281,6 +301,15 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) if (zdev) { mutex_lock(&zdev->state_lock); + rc = clp_refresh_fh(zdev->fid, &fh); + if (rc) + goto no_pdev; + if (!fh || ccdf->fh != fh) { + /* Ignore events with stale handles */ + zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n", + ccdf->fid, fh, ccdf->fh); + goto no_pdev; + } zpci_update_fh(zdev, ccdf->fh); if (zdev->zbus->bus) pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c index c5e6545f6fcf..8e8a8bf518b6 100644 --- a/arch/um/drivers/ubd_user.c +++ b/arch/um/drivers/ubd_user.c @@ -41,7 +41,7 @@ int start_io_thread(struct os_helper_thread **td_out, int *fd_out) *fd_out = fds[1]; err = os_set_fd_block(*fd_out, 0); - err = os_set_fd_block(kernel_fd, 0); + err |= os_set_fd_block(kernel_fd, 0); if (err) { printk("start_io_thread - failed to set nonblocking I/O.\n"); goto out_close; diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index f292e0b4ff8b..9bbbddfe866b 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -1625,35 +1625,19 @@ static void vector_eth_configure( device->dev = dev; - *vp = ((struct vector_private) - { - .list = LIST_HEAD_INIT(vp->list), - .dev = dev, - .unit = n, - .options = get_transport_options(def), - .rx_irq = 0, - .tx_irq = 0, - .parsed = def, - .max_packet = get_mtu(def) + ETH_HEADER_OTHER, - /* TODO - we need to calculate headroom so that ip header - * is 16 byte aligned all the time - */ - .headroom = get_headroom(def), - .form_header = NULL, - .verify_header = NULL, - .header_rxbuffer = NULL, - .header_txbuffer = NULL, - .header_size = 0, - .rx_header_size = 0, - .rexmit_scheduled = false, - .opened = false, - .transport_data = NULL, - .in_write_poll = false, - .coalesce = 2, - .req_size = get_req_size(def), - .in_error = false, - .bpf = NULL - }); + INIT_LIST_HEAD(&vp->list); + vp->dev = dev; + vp->unit = n; + vp->options = get_transport_options(def); + vp->parsed = def; + vp->max_packet = get_mtu(def) + ETH_HEADER_OTHER; + /* + * TODO - we need to calculate headroom so that ip header + * is 16 byte aligned all the time + */ + vp->headroom = get_headroom(def); + vp->coalesce = 2; + vp->req_size = get_req_size(def); dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST); INIT_WORK(&vp->reset_tx, vector_reset_tx); diff --git a/arch/um/drivers/vfio_kern.c b/arch/um/drivers/vfio_kern.c index b51fc9888ae1..13b971a2bd43 100644 --- a/arch/um/drivers/vfio_kern.c +++ b/arch/um/drivers/vfio_kern.c @@ -570,6 +570,17 @@ static void uml_vfio_release_device(struct uml_vfio_device *dev) kfree(dev); } +static struct uml_vfio_device *uml_vfio_find_device(const char *device) +{ + struct uml_vfio_device *dev; + + list_for_each_entry(dev, ¨_vfio_devices, list) { + if (!strcmp(dev->name, device)) + return dev; + } + return NULL; +} + static int uml_vfio_cmdline_set(const char *device, const struct kernel_param *kp) { struct uml_vfio_device *dev; @@ -582,6 +593,9 @@ static int uml_vfio_cmdline_set(const char *device, const struct kernel_param *k uml_vfio_container.fd = fd; } + if (uml_vfio_find_device(device)) + return -EEXIST; + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 340e5468980e..8bed9030ad47 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -89,7 +89,7 @@ config X86 select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN select ARCH_HAS_EARLY_DEBUG if KGDB select ARCH_HAS_ELF_RANDOMIZE - select ARCH_HAS_EXECMEM_ROX if X86_64 + select ARCH_HAS_EXECMEM_ROX if X86_64 && STRICT_MODULE_RWX select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL @@ -147,7 +147,7 @@ config X86 select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANTS_NO_INSTR select ARCH_WANT_GENERAL_HUGETLB - select ARCH_WANT_HUGE_PMD_SHARE + select ARCH_WANT_HUGE_PMD_SHARE if X86_64 select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP if X86_64 select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP if X86_64 @@ -2695,6 +2695,15 @@ config MITIGATION_ITS disabled, mitigation cannot be enabled via cmdline. See <file:Documentation/admin-guide/hw-vuln/indirect-target-selection.rst> +config MITIGATION_TSA + bool "Mitigate Transient Scheduler Attacks" + depends on CPU_SUP_AMD + default y + help + Enable mitigation for Transient Scheduler Attacks. TSA is a hardware + security vulnerability on AMD CPUs which can lead to forwarding of + invalid info to subsequent instructions and thus can affect their + timing and thereby cause a leakage. endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index b6db4e0b936b..7543a8b52c67 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -88,7 +88,7 @@ static const char * const sev_status_feat_names[] = { */ static u64 snp_tsc_scale __ro_after_init; static u64 snp_tsc_offset __ro_after_init; -static u64 snp_tsc_freq_khz __ro_after_init; +static unsigned long snp_tsc_freq_khz __ro_after_init; DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); @@ -2167,15 +2167,31 @@ static unsigned long securetsc_get_tsc_khz(void) void __init snp_secure_tsc_init(void) { - unsigned long long tsc_freq_mhz; + struct snp_secrets_page *secrets; + unsigned long tsc_freq_mhz; + void *mem; if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) return; + mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE); + if (!mem) { + pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n"); + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); + } + + secrets = (__force struct snp_secrets_page *)mem; + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz); - snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000); + + /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */ + tsc_freq_mhz &= GENMASK_ULL(17, 0); + + snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor); x86_platform.calibrate_cpu = securetsc_get_tsc_khz; x86_platform.calibrate_tsc = securetsc_get_tsc_khz; + + early_memunmap(mem, PAGE_SIZE); } diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index 175958b02f2b..8e9a0cc20a4a 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -36,20 +36,20 @@ EXPORT_SYMBOL_GPL(write_ibpb); /* * Define the VERW operand that is disguised as entry code so that - * it can be referenced with KPTI enabled. This ensure VERW can be + * it can be referenced with KPTI enabled. This ensures VERW can be * used late in exit-to-user path after page tables are switched. */ .pushsection .entry.text, "ax" .align L1_CACHE_BYTES, 0xcc -SYM_CODE_START_NOALIGN(mds_verw_sel) +SYM_CODE_START_NOALIGN(x86_verw_sel) UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR .word __KERNEL_DS .align L1_CACHE_BYTES, 0xcc -SYM_CODE_END(mds_verw_sel); +SYM_CODE_END(x86_verw_sel); /* For KVM */ -EXPORT_SYMBOL_GPL(mds_verw_sel); +EXPORT_SYMBOL_GPL(x86_verw_sel); .popsection diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 741b229f0718..c2fb729c270e 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2826,7 +2826,7 @@ static void intel_pmu_read_event(struct perf_event *event) * If the PEBS counters snapshotting is enabled, * the topdown event is available in PEBS records. */ - if (is_topdown_event(event) && !is_pebs_counter_event_group(event)) + if (is_topdown_count(event) && !is_pebs_counter_event_group(event)) static_call(intel_pmu_update_topdown_event)(event, NULL); else intel_pmu_drain_pebs_buffer(); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ee176236c2be..286d509f9363 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -456,6 +456,7 @@ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ #define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ #define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */ #define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ @@ -487,6 +488,9 @@ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ #define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */ #define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */ +#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ +#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ +#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ /* * BUG word(s) @@ -542,5 +546,5 @@ #define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ #define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ - +#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 363110e6b2e3..a2c1f2d24b64 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -9,6 +9,14 @@ #include <asm/cpufeature.h> #include <asm/msr.h> +/* + * Define bits that are always set to 1 in DR7, only bit 10 is + * architecturally reserved to '1'. + * + * This is also the init/reset value for DR7. + */ +#define DR7_FIXED_1 0x00000400 + DECLARE_PER_CPU(unsigned long, cpu_dr7); #ifndef CONFIG_PARAVIRT_XXL @@ -100,8 +108,8 @@ static __always_inline void native_set_debugreg(int regno, unsigned long value) static inline void hw_breakpoint_disable(void) { - /* Zero the control register for HW Breakpoint */ - set_debugreg(0UL, 7); + /* Reset the control register for HW Breakpoint */ + set_debugreg(DR7_FIXED_1, 7); /* Zero-out the individual HW breakpoint address registers */ set_debugreg(0UL, 0); @@ -125,9 +133,12 @@ static __always_inline unsigned long local_db_save(void) return 0; get_debugreg(dr7, 7); - dr7 &= ~0x400; /* architecturally set bit */ + + /* Architecturally set bit */ + dr7 &= ~DR7_FIXED_1; if (dr7) - set_debugreg(0, 7); + set_debugreg(DR7_FIXED_1, 7); + /* * Ensure the compiler doesn't lower the above statements into * the critical section; disabling breakpoints late would not diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 9a9b21b78905..b30e5474c18e 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -44,13 +44,13 @@ static __always_inline void native_irq_enable(void) static __always_inline void native_safe_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static __always_inline void native_halt(void) { - mds_idle_clear_cpu_buffers(); + x86_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b4a391929cdb..f7af967aa16f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -31,6 +31,7 @@ #include <asm/apic.h> #include <asm/pvclock-abi.h> +#include <asm/debugreg.h> #include <asm/desc.h> #include <asm/mtrr.h> #include <asm/msr-index.h> @@ -249,7 +250,6 @@ enum x86_intercept_stage; #define DR7_BP_EN_MASK 0x000000ff #define DR7_GE (1 << 9) #define DR7_GD (1 << 13) -#define DR7_FIXED_1 0x00000400 #define DR7_VOLATILE 0xffff2bff #define KVM_GUESTDBG_VALID_MASK \ @@ -700,8 +700,13 @@ struct kvm_vcpu_hv { struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS]; - /* Preallocated buffer for handling hypercalls passing sparse vCPU set */ + /* + * Preallocated buffers for handling hypercalls that pass sparse vCPU + * sets (for high vCPU counts, they're too large to comfortably fit on + * the stack). + */ u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS]; + DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); struct hv_vp_assist_page vp_assist_page; @@ -764,6 +769,7 @@ enum kvm_only_cpuid_leafs { CPUID_8000_0022_EAX, CPUID_7_2_EDX, CPUID_24_0_EBX, + CPUID_8000_0021_ECX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index e988bac0a4a1..3c2de4ce3b10 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -5,12 +5,20 @@ #include <asm-generic/module.h> #include <asm/orc_types.h> +struct its_array { +#ifdef CONFIG_MITIGATION_ITS + void **pages; + int num; +#endif +}; + struct mod_arch_specific { #ifdef CONFIG_UNWINDER_ORC unsigned int num_orcs; int *orc_unwind_ip; struct orc_entry *orc_unwind; #endif + struct its_array its_pages; }; #endif /* _ASM_X86_MODULE_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b7dded3c8113..5cfb5d74dd5f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -628,6 +628,7 @@ #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 #define MSR_AMD_PPIN 0xc00102f1 +#define MSR_AMD64_CPUID_FN_7 0xc0011002 #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index dd2b129b0418..6ca6516c7492 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -43,8 +43,6 @@ static __always_inline void __monitorx(const void *eax, u32 ecx, u32 edx) static __always_inline void __mwait(u32 eax, u32 ecx) { - mds_idle_clear_cpu_buffers(); - /* * Use the instruction mnemonic with implicit operands, as the LLVM * assembler fails to assemble the mnemonic with explicit operands: @@ -80,7 +78,7 @@ static __always_inline void __mwait(u32 eax, u32 ecx) */ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) { - /* No MDS buffer clear as this is AMD/HYGON only */ + /* No need for TSA buffer clearing on AMD */ /* "mwaitx %eax, %ebx, %ecx" */ asm volatile(".byte 0x0f, 0x01, 0xfb" @@ -98,7 +96,6 @@ static __always_inline void __mwaitx(u32 eax, u32 ebx, u32 ecx) */ static __always_inline void __sti_mwait(u32 eax, u32 ecx) { - mds_idle_clear_cpu_buffers(); asm volatile("sti; mwait" :: "a" (eax), "c" (ecx)); } @@ -115,21 +112,29 @@ static __always_inline void __sti_mwait(u32 eax, u32 ecx) */ static __always_inline void mwait_idle_with_hints(u32 eax, u32 ecx) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { const void *addr = ¤t_thread_info()->flags; alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); __monitor(addr, 0, 0); - if (!need_resched()) { - if (ecx & 1) { - __mwait(eax, ecx); - } else { - __sti_mwait(eax, ecx); - raw_local_irq_disable(); - } + if (need_resched()) + goto out; + + if (ecx & 1) { + __mwait(eax, ecx); + } else { + __sti_mwait(eax, ecx); + raw_local_irq_disable(); } } + +out: current_clr_polling(); } diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 20d754b98f3f..10f261678749 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -302,25 +302,31 @@ .endm /* - * Macro to execute VERW instruction that mitigate transient data sampling - * attacks such as MDS. On affected systems a microcode update overloaded VERW - * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. - * + * Macro to execute VERW insns that mitigate transient data sampling + * attacks such as MDS or TSA. On affected systems a microcode update + * overloaded VERW insns to also clear the CPU buffers. VERW clobbers + * CFLAGS.ZF. * Note: Only the memory operand variant of VERW clears the CPU buffers. */ -.macro CLEAR_CPU_BUFFERS +.macro __CLEAR_CPU_BUFFERS feature #ifdef CONFIG_X86_64 - ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF + ALTERNATIVE "", "verw x86_verw_sel(%rip)", \feature #else /* * In 32bit mode, the memory operand must be a %cs reference. The data * segments may not be usable (vm86 mode), and the stack segment may not * be flat (ESPFIX32). */ - ALTERNATIVE "", "verw %cs:mds_verw_sel", X86_FEATURE_CLEAR_CPU_BUF + ALTERNATIVE "", "verw %cs:x86_verw_sel", \feature #endif .endm +#define CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF + +#define VM_CLEAR_CPU_BUFFERS \ + __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF_VM + #ifdef CONFIG_X86_64 .macro CLEAR_BRANCH_HISTORY ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP @@ -567,24 +573,24 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb); -DECLARE_STATIC_KEY_FALSE(mds_idle_clear); +DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(cpu_buf_vm_clear); -extern u16 mds_verw_sel; +extern u16 x86_verw_sel; #include <asm/segment.h> /** - * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability + * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns * * This uses the otherwise unused and obsolete VERW instruction in * combination with microcode which triggers a CPU buffer flush when the * instruction is executed. */ -static __always_inline void mds_clear_cpu_buffers(void) +static __always_inline void x86_clear_cpu_buffers(void) { static const u16 ds = __KERNEL_DS; @@ -601,14 +607,15 @@ static __always_inline void mds_clear_cpu_buffers(void) } /** - * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS + * and TSA vulnerabilities. * * Clear CPU buffers if the corresponding static key is enabled */ -static __always_inline void mds_idle_clear_cpu_buffers(void) +static __always_inline void x86_idle_clear_cpu_buffers(void) { - if (static_branch_likely(&mds_idle_clear)) - mds_clear_cpu_buffers(); + if (static_branch_likely(&cpu_buf_idle_clear)) + x86_clear_cpu_buffers(); } #endif /* __ASSEMBLER__ */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 58e028d42e41..a631f7d7c0c0 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -223,6 +223,18 @@ struct snp_tsc_info_resp { u8 rsvd2[100]; } __packed; +/* + * Obtain the mean TSC frequency by decreasing the nominal TSC frequency with + * TSC_FACTOR as documented in the SNP Firmware ABI specification: + * + * GUEST_TSC_FREQ * (1 - (TSC_FACTOR * 0.00001)) + * + * which is equivalent to: + * + * GUEST_TSC_FREQ -= (GUEST_TSC_FREQ * TSC_FACTOR) / 100000; + */ +#define SNP_SCALE_TSC_FREQ(freq, factor) ((freq) - (freq) * (factor) / 100000) + struct snp_guest_req { void *req_buf; size_t req_sz; @@ -282,8 +294,11 @@ struct snp_secrets_page { u8 svsm_guest_vmpl; u8 rsvd3[3]; + /* The percentage decrease from nominal to mean TSC frequency. */ + u32 tsc_factor; + /* Remainder of page */ - u8 rsvd4[3744]; + u8 rsvd4[3740]; } __packed; struct snp_msg_desc { diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index 2f3820342598..8bc074c8d7c6 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -72,6 +72,7 @@ #define TDVMCALL_MAP_GPA 0x10001 #define TDVMCALL_GET_QUOTE 0x10002 #define TDVMCALL_REPORT_FATAL_ERROR 0x10003 +#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004ULL /* * TDG.VP.VMCALL Status Codes (returned in R10) @@ -80,6 +81,7 @@ #define TDVMCALL_STATUS_RETRY 0x0000000000000001ULL #define TDVMCALL_STATUS_INVALID_OPERAND 0x8000000000000000ULL #define TDVMCALL_STATUS_ALIGN_ERROR 0x8000000000000002ULL +#define TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED 0x8000000000000003ULL /* * Bitmasks of exposed registers (with VMM). diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index e770c4fc47f4..8727c7e21dd1 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -24,4 +24,26 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); +/* + * To prevent immediate repeat of single step trap on return from SIGTRAP + * handler if the trap flag (TF) is set without an external debugger attached, + * clear the software event flag in the augmented SS, ensuring no single-step + * trap is pending upon ERETU completion. + * + * Note, this function should be called in sigreturn() before the original + * state is restored to make sure the TF is read from the entry frame. + */ +static __always_inline void prevent_single_step_upon_eretu(struct pt_regs *regs) +{ + /* + * If the trap flag (TF) is set, i.e., the sigreturn() SYSCALL instruction + * is being single-stepped, do not clear the software event flag in the + * augmented SS, thus a debugger won't skip over the following instruction. + */ +#ifdef CONFIG_X86_FRED + if (!(regs->flags & X86_EFLAGS_TF)) + regs->fred_ss.swevent = 0; +#endif +} + #endif /* _ASM_X86_SIGHANDLING_H */ diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 8b19294600c4..7ddef3a69866 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -106,7 +106,7 @@ void tdx_init(void); typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args); -static inline u64 sc_retry(sc_func_t func, u64 fn, +static __always_inline u64 sc_retry(sc_func_t func, u64 fn, struct tdx_module_args *args) { int retry = RDRAND_RETRY_LOOPS; diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h index 0007ba077c0c..41da492dfb01 100644 --- a/arch/x86/include/uapi/asm/debugreg.h +++ b/arch/x86/include/uapi/asm/debugreg.h @@ -15,7 +15,26 @@ which debugging register was responsible for the trap. The other bits are either reserved or not of interest to us. */ -/* Define reserved bits in DR6 which are always set to 1 */ +/* + * Define bits in DR6 which are set to 1 by default. + * + * This is also the DR6 architectural value following Power-up, Reset or INIT. + * + * Note, with the introduction of Bus Lock Detection (BLD) and Restricted + * Transactional Memory (RTM), the DR6 register has been modified: + * + * 1) BLD flag (bit 11) is no longer reserved to 1 if the CPU supports + * Bus Lock Detection. The assertion of a bus lock could clear it. + * + * 2) RTM flag (bit 16) is no longer reserved to 1 if the CPU supports + * restricted transactional memory. #DB occurred inside an RTM region + * could clear it. + * + * Apparently, DR6.BLD and DR6.RTM are active low bits. + * + * As a result, DR6_RESERVED is an incorrect name now, but it is kept for + * compatibility. + */ #define DR6_RESERVED (0xFFFF0FF0) #define DR_TRAP0 (0x1) /* db0 */ diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 6f3499507c5e..0f15d683817d 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -965,7 +965,13 @@ struct kvm_tdx_cmd { struct kvm_tdx_capabilities { __u64 supported_attrs; __u64 supported_xfam; - __u64 reserved[254]; + + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; /* Configurable CPUID bits for userspace */ struct kvm_cpuid2 cpuid; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ecfe7b497cad..ea1d984166cd 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -116,6 +116,24 @@ static struct module *its_mod; #endif static void *its_page; static unsigned int its_offset; +struct its_array its_pages; + +static void *__its_alloc(struct its_array *pages) +{ + void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); + if (!page) + return NULL; + + void *tmp = krealloc(pages->pages, (pages->num+1) * sizeof(void *), + GFP_KERNEL); + if (!tmp) + return NULL; + + pages->pages = tmp; + pages->pages[pages->num++] = page; + + return no_free_ptr(page); +} /* Initialize a thunk with the "jmp *reg; int3" instructions. */ static void *its_init_thunk(void *thunk, int reg) @@ -151,6 +169,21 @@ static void *its_init_thunk(void *thunk, int reg) return thunk + offset; } +static void its_pages_protect(struct its_array *pages) +{ + for (int i = 0; i < pages->num; i++) { + void *page = pages->pages[i]; + execmem_restore_rox(page, PAGE_SIZE); + } +} + +static void its_fini_core(void) +{ + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) + its_pages_protect(&its_pages); + kfree(its_pages.pages); +} + #ifdef CONFIG_MODULES void its_init_mod(struct module *mod) { @@ -173,10 +206,8 @@ void its_fini_mod(struct module *mod) its_page = NULL; mutex_unlock(&text_mutex); - for (int i = 0; i < mod->its_num_pages; i++) { - void *page = mod->its_page_array[i]; - execmem_restore_rox(page, PAGE_SIZE); - } + if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + its_pages_protect(&mod->arch.its_pages); } void its_free_mod(struct module *mod) @@ -184,37 +215,33 @@ void its_free_mod(struct module *mod) if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) return; - for (int i = 0; i < mod->its_num_pages; i++) { - void *page = mod->its_page_array[i]; + for (int i = 0; i < mod->arch.its_pages.num; i++) { + void *page = mod->arch.its_pages.pages[i]; execmem_free(page); } - kfree(mod->its_page_array); + kfree(mod->arch.its_pages.pages); } #endif /* CONFIG_MODULES */ static void *its_alloc(void) { - void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); - - if (!page) - return NULL; + struct its_array *pages = &its_pages; + void *page; #ifdef CONFIG_MODULES - if (its_mod) { - void *tmp = krealloc(its_mod->its_page_array, - (its_mod->its_num_pages+1) * sizeof(void *), - GFP_KERNEL); - if (!tmp) - return NULL; + if (its_mod) + pages = &its_mod->arch.its_pages; +#endif - its_mod->its_page_array = tmp; - its_mod->its_page_array[its_mod->its_num_pages++] = page; + page = __its_alloc(pages); + if (!page) + return NULL; - execmem_make_temp_rw(page, PAGE_SIZE); - } -#endif /* CONFIG_MODULES */ + execmem_make_temp_rw(page, PAGE_SIZE); + if (pages == &its_pages) + set_memory_x((unsigned long)page, 1); - return no_free_ptr(page); + return page; } static void *its_allocate_thunk(int reg) @@ -268,7 +295,9 @@ u8 *its_static_thunk(int reg) return thunk; } -#endif +#else +static inline void its_fini_core(void) {} +#endif /* CONFIG_MITIGATION_ITS */ /* * Nomenclature for variable names to simplify and clarify this code and ease @@ -2338,6 +2367,8 @@ void __init alternative_instructions(void) apply_retpolines(__retpoline_sites, __retpoline_sites_end); apply_returns(__return_sites, __return_sites_end); + its_fini_core(); + /* * Adjust all CALL instructions to point to func()-10, including * those in .altinstr_replacement. @@ -3107,6 +3138,6 @@ void __ref smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, c */ void __ref smp_text_poke_single(void *addr, const void *opcode, size_t len, const void *emulate) { - __smp_text_poke_batch_add(addr, opcode, len, emulate); + smp_text_poke_batch_add(addr, opcode, len, emulate); smp_text_poke_batch_finish(); } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 93da466dfe2c..329ee185d8cc 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -9,7 +9,7 @@ #include <linux/sched/clock.h> #include <linux/random.h> #include <linux/topology.h> -#include <asm/amd/fch.h> +#include <linux/platform_data/x86/amd-fch.h> #include <asm/processor.h> #include <asm/apic.h> #include <asm/cacheinfo.h> @@ -31,7 +31,7 @@ #include "cpu.h" -u16 invlpgb_count_max __ro_after_init; +u16 invlpgb_count_max __ro_after_init = 1; static inline int rdmsrq_amd_safe(unsigned msr, u64 *p) { @@ -377,6 +377,47 @@ static void bsp_determine_snp(struct cpuinfo_x86 *c) #endif } +#define ZEN_MODEL_STEP_UCODE(fam, model, step, ucode) \ + X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, fam, model), \ + step, step, ucode) + +static const struct x86_cpu_id amd_tsa_microcode[] = { + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x1, 0x0a0011d7), + ZEN_MODEL_STEP_UCODE(0x19, 0x01, 0x2, 0x0a00123b), + ZEN_MODEL_STEP_UCODE(0x19, 0x08, 0x2, 0x0a00820d), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x1, 0x0a10114c), + ZEN_MODEL_STEP_UCODE(0x19, 0x11, 0x2, 0x0a10124c), + ZEN_MODEL_STEP_UCODE(0x19, 0x18, 0x1, 0x0a108109), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x0, 0x0a20102e), + ZEN_MODEL_STEP_UCODE(0x19, 0x21, 0x2, 0x0a201211), + ZEN_MODEL_STEP_UCODE(0x19, 0x44, 0x1, 0x0a404108), + ZEN_MODEL_STEP_UCODE(0x19, 0x50, 0x0, 0x0a500012), + ZEN_MODEL_STEP_UCODE(0x19, 0x61, 0x2, 0x0a60120a), + ZEN_MODEL_STEP_UCODE(0x19, 0x74, 0x1, 0x0a704108), + ZEN_MODEL_STEP_UCODE(0x19, 0x75, 0x2, 0x0a705208), + ZEN_MODEL_STEP_UCODE(0x19, 0x78, 0x0, 0x0a708008), + ZEN_MODEL_STEP_UCODE(0x19, 0x7c, 0x0, 0x0a70c008), + ZEN_MODEL_STEP_UCODE(0x19, 0xa0, 0x2, 0x0aa00216), + {}, +}; + +static void tsa_init(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return; + + if (cpu_has(c, X86_FEATURE_ZEN3) || + cpu_has(c, X86_FEATURE_ZEN4)) { + if (x86_match_min_microcode_rev(amd_tsa_microcode)) + setup_force_cpu_cap(X86_FEATURE_VERW_CLEAR); + else + pr_debug("%s: current revision: 0x%x\n", __func__, c->microcode); + } else { + setup_force_cpu_cap(X86_FEATURE_TSA_SQ_NO); + setup_force_cpu_cap(X86_FEATURE_TSA_L1_NO); + } +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -489,6 +530,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) } bsp_determine_snp(c); + + tsa_init(c); + return; warn: @@ -930,6 +974,16 @@ static void init_amd_zen2(struct cpuinfo_x86 *c) init_spectral_chicken(c); fix_erratum_1386(c); zen2_zenbleed_check(c); + + /* Disable RDSEED on AMD Cyan Skillfish because of an error. */ + if (c->x86_model == 0x47 && c->x86_stepping == 0x0) { + clear_cpu_cap(c, X86_FEATURE_RDSEED); + msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18); + pr_emerg("RDSEED is not reliable on this platform; disabling.\n"); + } + + /* Correct misconfigured CPUID on some clients. */ + clear_cpu_cap(c, X86_FEATURE_INVLPGB); } static void init_amd_zen3(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7f94e6a5497d..f4d3abb12317 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -94,6 +94,8 @@ static void __init bhi_apply_mitigation(void); static void __init its_select_mitigation(void); static void __init its_update_mitigation(void); static void __init its_apply_mitigation(void); +static void __init tsa_select_mitigation(void); +static void __init tsa_apply_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -169,9 +171,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DEFINE_STATIC_KEY_FALSE(switch_vcpu_ibpb); EXPORT_SYMBOL_GPL(switch_vcpu_ibpb); -/* Control MDS CPU buffer clear before idling (halt, mwait) */ -DEFINE_STATIC_KEY_FALSE(mds_idle_clear); -EXPORT_SYMBOL_GPL(mds_idle_clear); +/* Control CPU buffer clear before idling (halt, mwait) */ +DEFINE_STATIC_KEY_FALSE(cpu_buf_idle_clear); +EXPORT_SYMBOL_GPL(cpu_buf_idle_clear); /* * Controls whether l1d flush based mitigations are enabled, @@ -225,6 +227,7 @@ void __init cpu_select_mitigations(void) gds_select_mitigation(); its_select_mitigation(); bhi_select_mitigation(); + tsa_select_mitigation(); /* * After mitigations are selected, some may need to update their @@ -272,6 +275,7 @@ void __init cpu_select_mitigations(void) gds_apply_mitigation(); its_apply_mitigation(); bhi_apply_mitigation(); + tsa_apply_mitigation(); } /* @@ -637,7 +641,7 @@ static void __init mmio_apply_mitigation(void) * is required irrespective of SMT state. */ if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); if (mmio_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); @@ -1488,6 +1492,94 @@ static void __init its_apply_mitigation(void) } #undef pr_fmt +#define pr_fmt(fmt) "Transient Scheduler Attacks: " fmt + +enum tsa_mitigations { + TSA_MITIGATION_NONE, + TSA_MITIGATION_AUTO, + TSA_MITIGATION_UCODE_NEEDED, + TSA_MITIGATION_USER_KERNEL, + TSA_MITIGATION_VM, + TSA_MITIGATION_FULL, +}; + +static const char * const tsa_strings[] = { + [TSA_MITIGATION_NONE] = "Vulnerable", + [TSA_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", + [TSA_MITIGATION_USER_KERNEL] = "Mitigation: Clear CPU buffers: user/kernel boundary", + [TSA_MITIGATION_VM] = "Mitigation: Clear CPU buffers: VM", + [TSA_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", +}; + +static enum tsa_mitigations tsa_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_TSA) ? TSA_MITIGATION_AUTO : TSA_MITIGATION_NONE; + +static int __init tsa_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + tsa_mitigation = TSA_MITIGATION_NONE; + else if (!strcmp(str, "on")) + tsa_mitigation = TSA_MITIGATION_FULL; + else if (!strcmp(str, "user")) + tsa_mitigation = TSA_MITIGATION_USER_KERNEL; + else if (!strcmp(str, "vm")) + tsa_mitigation = TSA_MITIGATION_VM; + else + pr_err("Ignoring unknown tsa=%s option.\n", str); + + return 0; +} +early_param("tsa", tsa_parse_cmdline); + +static void __init tsa_select_mitigation(void) +{ + if (cpu_mitigations_off() || !boot_cpu_has_bug(X86_BUG_TSA)) { + tsa_mitigation = TSA_MITIGATION_NONE; + return; + } + + if (tsa_mitigation == TSA_MITIGATION_NONE) + return; + + if (!boot_cpu_has(X86_FEATURE_VERW_CLEAR)) { + tsa_mitigation = TSA_MITIGATION_UCODE_NEEDED; + goto out; + } + + if (tsa_mitigation == TSA_MITIGATION_AUTO) + tsa_mitigation = TSA_MITIGATION_FULL; + + /* + * No need to set verw_clear_cpu_buf_mitigation_selected - it + * doesn't fit all cases here and it is not needed because this + * is the only VERW-based mitigation on AMD. + */ +out: + pr_info("%s\n", tsa_strings[tsa_mitigation]); +} + +static void __init tsa_apply_mitigation(void) +{ + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + break; + case TSA_MITIGATION_VM: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + case TSA_MITIGATION_FULL: + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM); + break; + default: + break; + } +} + +#undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = @@ -2249,10 +2341,10 @@ static void update_mds_branch_idle(void) return; if (sched_smt_active()) { - static_branch_enable(&mds_idle_clear); + static_branch_enable(&cpu_buf_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { - static_branch_disable(&mds_idle_clear); + static_branch_disable(&cpu_buf_idle_clear); } } @@ -2316,6 +2408,25 @@ void cpu_bugs_smt_update(void) break; } + switch (tsa_mitigation) { + case TSA_MITIGATION_USER_KERNEL: + case TSA_MITIGATION_VM: + case TSA_MITIGATION_AUTO: + case TSA_MITIGATION_FULL: + /* + * TSA-SQ can potentially lead to info leakage between + * SMT threads. + */ + if (sched_smt_active()) + static_branch_enable(&cpu_buf_idle_clear); + else + static_branch_disable(&cpu_buf_idle_clear); + break; + case TSA_MITIGATION_NONE: + case TSA_MITIGATION_UCODE_NEEDED: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -3265,6 +3376,11 @@ static ssize_t gds_show_state(char *buf) return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]); } +static ssize_t tsa_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -3328,6 +3444,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_ITS: return its_show_state(buf); + case X86_BUG_TSA: + return tsa_show_state(buf); + default: break; } @@ -3414,6 +3533,11 @@ ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_att { return cpu_show_common(dev, attr, buf, X86_BUG_ITS); } + +ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_TSA); +} #endif void __warn_thunk(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8feb8fd2957a..fb50c1dd53ef 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1233,6 +1233,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define ITS BIT(8) /* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */ #define ITS_NATIVE_ONLY BIT(9) +/* CPU is affected by Transient Scheduler Attacks */ +#define TSA BIT(10) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), @@ -1280,7 +1282,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x16, RETBLEED), VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO), VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO), - VULNBL_AMD(0x19, SRSO), + VULNBL_AMD(0x19, SRSO | TSA), VULNBL_AMD(0x1a, SRSO), {} }; @@ -1530,6 +1532,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY); } + if (c->x86_vendor == X86_VENDOR_AMD) { + if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) || + !cpu_has(c, X86_FEATURE_TSA_L1_NO)) { + if (cpu_matches(cpu_vuln_blacklist, TSA) || + /* Enable bug on Zen guests to allow for live migration. */ + (cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_ZEN))) + setup_force_cpu_bug(X86_BUG_TSA); + } + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; @@ -2243,20 +2255,16 @@ EXPORT_PER_CPU_SYMBOL(__stack_chk_guard); #endif #endif -/* - * Clear all 6 debug registers: - */ -static void clear_all_debug_regs(void) +static void initialize_debug_regs(void) { - int i; - - for (i = 0; i < 8; i++) { - /* Ignore db4, db5 */ - if ((i == 4) || (i == 5)) - continue; - - set_debugreg(0, i); - } + /* Control register first -- to make sure everything is disabled. */ + set_debugreg(DR7_FIXED_1, 7); + set_debugreg(DR6_RESERVED, 6); + /* dr5 and dr4 don't exist */ + set_debugreg(0, 3); + set_debugreg(0, 2); + set_debugreg(0, 1); + set_debugreg(0, 0); } #ifdef CONFIG_KGDB @@ -2417,7 +2425,7 @@ void cpu_init(void) load_mm_ldt(&init_mm); - clear_all_debug_regs(); + initialize_debug_regs(); dbg_restore_debug_regs(); doublefault_init_cpu_tss(); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 9d852c3b2cb5..5c4eb28c3ac9 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -350,7 +350,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu) struct thresh_restart { struct threshold_block *b; - int reset; int set_lvt_off; int lvt_off; u16 old_limit; @@ -432,13 +431,13 @@ static void threshold_restart_bank(void *_tr) rdmsr(tr->b->address, lo, hi); - if (tr->b->threshold_limit < (hi & THRESHOLD_MAX)) - tr->reset = 1; /* limit cannot be lower than err count */ - - if (tr->reset) { /* reset err count and overflow bit */ - hi = - (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | - (THRESHOLD_MAX - tr->b->threshold_limit); + /* + * Reset error count and overflow bit. + * This is done during init or after handling an interrupt. + */ + if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) { + hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI); + hi |= THRESHOLD_MAX - tr->b->threshold_limit; } else if (tr->old_limit) { /* change limit w/o reset */ int new_count = (hi & THRESHOLD_MAX) + (tr->old_limit - tr->b->threshold_limit); @@ -1113,13 +1112,20 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol } bank_type = smca_get_bank_type(cpu, bank); - if (bank_type >= N_SMCA_BANK_TYPES) - return NULL; if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) { if (b->block < ARRAY_SIZE(smca_umc_block_names)) return smca_umc_block_names[b->block]; - return NULL; + } + + if (b && b->block) { + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block); + return buf_mcatype; + } + + if (bank_type >= N_SMCA_BANK_TYPES) { + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank); + return buf_mcatype; } if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index e9b3c5d4a52e..4da4eab56c81 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1740,6 +1740,11 @@ static void mc_poll_banks_default(void) void (*mc_poll_banks)(void) = mc_poll_banks_default; +static bool should_enable_timer(unsigned long iv) +{ + return !mca_cfg.ignore_ce && iv; +} + static void mce_timer_fn(struct timer_list *t) { struct timer_list *cpu_t = this_cpu_ptr(&mce_timer); @@ -1763,7 +1768,7 @@ static void mce_timer_fn(struct timer_list *t) if (mce_get_storm_mode()) { __start_timer(t, HZ); - } else { + } else if (should_enable_timer(iv)) { __this_cpu_write(mce_next_interval, iv); __start_timer(t, iv); } @@ -2156,11 +2161,10 @@ static void mce_start_timer(struct timer_list *t) { unsigned long iv = check_interval * HZ; - if (mca_cfg.ignore_ce || !iv) - return; - - this_cpu_write(mce_next_interval, iv); - __start_timer(t, iv); + if (should_enable_timer(iv)) { + this_cpu_write(mce_next_interval, iv); + __start_timer(t, iv); + } } static void __mcheck_cpu_setup_timer(void) @@ -2801,15 +2805,9 @@ static int mce_cpu_dead(unsigned int cpu) static int mce_cpu_online(unsigned int cpu) { struct timer_list *t = this_cpu_ptr(&mce_timer); - int ret; mce_device_create(cpu); - - ret = mce_threshold_create_device(cpu); - if (ret) { - mce_device_remove(cpu); - return ret; - } + mce_threshold_create_device(cpu); mce_reenable_cpu(); mce_start_timer(t); return 0; diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index efcf21e9552e..9b149b9c4109 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -478,6 +478,7 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c) void mce_intel_feature_clear(struct cpuinfo_x86 *c) { intel_clear_lmce(); + cmci_clear(); } bool intel_filter_mce(struct mce *m) diff --git a/arch/x86/kernel/cpu/microcode/amd_shas.c b/arch/x86/kernel/cpu/microcode/amd_shas.c index 2a1655b1fdd8..1fd349cfc802 100644 --- a/arch/x86/kernel/cpu/microcode/amd_shas.c +++ b/arch/x86/kernel/cpu/microcode/amd_shas.c @@ -231,6 +231,13 @@ static const struct patch_digest phashes[] = { 0x0d,0x5b,0x65,0x34,0x69,0xb2,0x62,0x21, } }, + { 0xa0011d7, { + 0x35,0x07,0xcd,0x40,0x94,0xbc,0x81,0x6b, + 0xfc,0x61,0x56,0x1a,0xe2,0xdb,0x96,0x12, + 0x1c,0x1c,0x31,0xb1,0x02,0x6f,0xe5,0xd2, + 0xfe,0x1b,0x04,0x03,0x2c,0x8f,0x4c,0x36, + } + }, { 0xa001223, { 0xfb,0x32,0x5f,0xc6,0x83,0x4f,0x8c,0xb8, 0xa4,0x05,0xf9,0x71,0x53,0x01,0x16,0xc4, @@ -294,6 +301,13 @@ static const struct patch_digest phashes[] = { 0xc0,0xcd,0x33,0xf2,0x8d,0xf9,0xef,0x59, } }, + { 0xa00123b, { + 0xef,0xa1,0x1e,0x71,0xf1,0xc3,0x2c,0xe2, + 0xc3,0xef,0x69,0x41,0x7a,0x54,0xca,0xc3, + 0x8f,0x62,0x84,0xee,0xc2,0x39,0xd9,0x28, + 0x95,0xa7,0x12,0x49,0x1e,0x30,0x71,0x72, + } + }, { 0xa00820c, { 0xa8,0x0c,0x81,0xc0,0xa6,0x00,0xe7,0xf3, 0x5f,0x65,0xd3,0xb9,0x6f,0xea,0x93,0x63, @@ -301,6 +315,13 @@ static const struct patch_digest phashes[] = { 0xe1,0x3b,0x8d,0xb2,0xf8,0x22,0x03,0xe2, } }, + { 0xa00820d, { + 0xf9,0x2a,0xc0,0xf4,0x9e,0xa4,0x87,0xa4, + 0x7d,0x87,0x00,0xfd,0xab,0xda,0x19,0xca, + 0x26,0x51,0x32,0xc1,0x57,0x91,0xdf,0xc1, + 0x05,0xeb,0x01,0x7c,0x5a,0x95,0x21,0xb7, + } + }, { 0xa10113e, { 0x05,0x3c,0x66,0xd7,0xa9,0x5a,0x33,0x10, 0x1b,0xf8,0x9c,0x8f,0xed,0xfc,0xa7,0xa0, @@ -322,6 +343,13 @@ static const struct patch_digest phashes[] = { 0xf1,0x5e,0xb0,0xde,0xb4,0x98,0xae,0xc4, } }, + { 0xa10114c, { + 0x9e,0xb6,0xa2,0xd9,0x87,0x38,0xc5,0x64, + 0xd8,0x88,0xfa,0x78,0x98,0xf9,0x6f,0x74, + 0x39,0x90,0x1b,0xa5,0xcf,0x5e,0xb4,0x2a, + 0x02,0xff,0xd4,0x8c,0x71,0x8b,0xe2,0xc0, + } + }, { 0xa10123e, { 0x03,0xb9,0x2c,0x76,0x48,0x93,0xc9,0x18, 0xfb,0x56,0xfd,0xf7,0xe2,0x1d,0xca,0x4d, @@ -343,6 +371,13 @@ static const struct patch_digest phashes[] = { 0x1b,0x7d,0x64,0x9d,0x4b,0x53,0x13,0x75, } }, + { 0xa10124c, { + 0x29,0xea,0xf1,0x2c,0xb2,0xe4,0xef,0x90, + 0xa4,0xcd,0x1d,0x86,0x97,0x17,0x61,0x46, + 0xfc,0x22,0xcb,0x57,0x75,0x19,0xc8,0xcc, + 0x0c,0xf5,0xbc,0xac,0x81,0x9d,0x9a,0xd2, + } + }, { 0xa108108, { 0xed,0xc2,0xec,0xa1,0x15,0xc6,0x65,0xe9, 0xd0,0xef,0x39,0xaa,0x7f,0x55,0x06,0xc6, @@ -350,6 +385,13 @@ static const struct patch_digest phashes[] = { 0x28,0x1e,0x9c,0x59,0x69,0x99,0x4d,0x16, } }, + { 0xa108109, { + 0x85,0xb4,0xbd,0x7c,0x49,0xa7,0xbd,0xfa, + 0x49,0x36,0x80,0x81,0xc5,0xb7,0x39,0x1b, + 0x9a,0xaa,0x50,0xde,0x9b,0xe9,0x32,0x35, + 0x42,0x7e,0x51,0x4f,0x52,0x2c,0x28,0x59, + } + }, { 0xa20102d, { 0xf9,0x6e,0xf2,0x32,0xd3,0x0f,0x5f,0x11, 0x59,0xa1,0xfe,0xcc,0xcd,0x9b,0x42,0x89, @@ -357,6 +399,13 @@ static const struct patch_digest phashes[] = { 0x8c,0xe9,0x19,0x3e,0xcc,0x3f,0x7b,0xb4, } }, + { 0xa20102e, { + 0xbe,0x1f,0x32,0x04,0x0d,0x3c,0x9c,0xdd, + 0xe1,0xa4,0xbf,0x76,0x3a,0xec,0xc2,0xf6, + 0x11,0x00,0xa7,0xaf,0x0f,0xe5,0x02,0xc5, + 0x54,0x3a,0x1f,0x8c,0x16,0xb5,0xff,0xbe, + } + }, { 0xa201210, { 0xe8,0x6d,0x51,0x6a,0x8e,0x72,0xf3,0xfe, 0x6e,0x16,0xbc,0x62,0x59,0x40,0x17,0xe9, @@ -364,6 +413,13 @@ static const struct patch_digest phashes[] = { 0xf7,0x55,0xf0,0x13,0xbb,0x22,0xf6,0x41, } }, + { 0xa201211, { + 0x69,0xa1,0x17,0xec,0xd0,0xf6,0x6c,0x95, + 0xe2,0x1e,0xc5,0x59,0x1a,0x52,0x0a,0x27, + 0xc4,0xed,0xd5,0x59,0x1f,0xbf,0x00,0xff, + 0x08,0x88,0xb5,0xe1,0x12,0xb6,0xcc,0x27, + } + }, { 0xa404107, { 0xbb,0x04,0x4e,0x47,0xdd,0x5e,0x26,0x45, 0x1a,0xc9,0x56,0x24,0xa4,0x4c,0x82,0xb0, @@ -371,6 +427,13 @@ static const struct patch_digest phashes[] = { 0x13,0xbc,0xc5,0x25,0xe4,0xc5,0xc3,0x99, } }, + { 0xa404108, { + 0x69,0x67,0x43,0x06,0xf8,0x0c,0x62,0xdc, + 0xa4,0x21,0x30,0x4f,0x0f,0x21,0x2c,0xcb, + 0xcc,0x37,0xf1,0x1c,0xc3,0xf8,0x2f,0x19, + 0xdf,0x53,0x53,0x46,0xb1,0x15,0xea,0x00, + } + }, { 0xa500011, { 0x23,0x3d,0x70,0x7d,0x03,0xc3,0xc4,0xf4, 0x2b,0x82,0xc6,0x05,0xda,0x80,0x0a,0xf1, @@ -378,6 +441,13 @@ static const struct patch_digest phashes[] = { 0x11,0x5e,0x96,0x7e,0x71,0xe9,0xfc,0x74, } }, + { 0xa500012, { + 0xeb,0x74,0x0d,0x47,0xa1,0x8e,0x09,0xe4, + 0x93,0x4c,0xad,0x03,0x32,0x4c,0x38,0x16, + 0x10,0x39,0xdd,0x06,0xaa,0xce,0xd6,0x0f, + 0x62,0x83,0x9d,0x8e,0x64,0x55,0xbe,0x63, + } + }, { 0xa601209, { 0x66,0x48,0xd4,0x09,0x05,0xcb,0x29,0x32, 0x66,0xb7,0x9a,0x76,0xcd,0x11,0xf3,0x30, @@ -385,6 +455,13 @@ static const struct patch_digest phashes[] = { 0xe8,0x73,0xe2,0xd6,0xdb,0xd2,0x77,0x1d, } }, + { 0xa60120a, { + 0x0c,0x8b,0x3d,0xfd,0x52,0x52,0x85,0x7d, + 0x20,0x3a,0xe1,0x7e,0xa4,0x21,0x3b,0x7b, + 0x17,0x86,0xae,0xac,0x13,0xb8,0x63,0x9d, + 0x06,0x01,0xd0,0xa0,0x51,0x9a,0x91,0x2c, + } + }, { 0xa704107, { 0xf3,0xc6,0x58,0x26,0xee,0xac,0x3f,0xd6, 0xce,0xa1,0x72,0x47,0x3b,0xba,0x2b,0x93, @@ -392,6 +469,13 @@ static const struct patch_digest phashes[] = { 0x64,0x39,0x71,0x8c,0xce,0xe7,0x41,0x39, } }, + { 0xa704108, { + 0xd7,0x55,0x15,0x2b,0xfe,0xc4,0xbc,0x93, + 0xec,0x91,0xa0,0xae,0x45,0xb7,0xc3,0x98, + 0x4e,0xff,0x61,0x77,0x88,0xc2,0x70,0x49, + 0xe0,0x3a,0x1d,0x84,0x38,0x52,0xbf,0x5a, + } + }, { 0xa705206, { 0x8d,0xc0,0x76,0xbd,0x58,0x9f,0x8f,0xa4, 0x12,0x9d,0x21,0xfb,0x48,0x21,0xbc,0xe7, @@ -399,6 +483,13 @@ static const struct patch_digest phashes[] = { 0x03,0x35,0xe9,0xbe,0xfb,0x06,0xdf,0xfc, } }, + { 0xa705208, { + 0x30,0x1d,0x55,0x24,0xbc,0x6b,0x5a,0x19, + 0x0c,0x7d,0x1d,0x74,0xaa,0xd1,0xeb,0xd2, + 0x16,0x62,0xf7,0x5b,0xe1,0x1f,0x18,0x11, + 0x5c,0xf0,0x94,0x90,0x26,0xec,0x69,0xff, + } + }, { 0xa708007, { 0x6b,0x76,0xcc,0x78,0xc5,0x8a,0xa3,0xe3, 0x32,0x2d,0x79,0xe4,0xc3,0x80,0xdb,0xb2, @@ -406,6 +497,13 @@ static const struct patch_digest phashes[] = { 0xdf,0x92,0x73,0x84,0x87,0x3c,0x73,0x93, } }, + { 0xa708008, { + 0x08,0x6e,0xf0,0x22,0x4b,0x8e,0xc4,0x46, + 0x58,0x34,0xe6,0x47,0xa2,0x28,0xfd,0xab, + 0x22,0x3d,0xdd,0xd8,0x52,0x9e,0x1d,0x16, + 0xfa,0x01,0x68,0x14,0x79,0x3e,0xe8,0x6b, + } + }, { 0xa70c005, { 0x88,0x5d,0xfb,0x79,0x64,0xd8,0x46,0x3b, 0x4a,0x83,0x8e,0x77,0x7e,0xcf,0xb3,0x0f, @@ -413,6 +511,13 @@ static const struct patch_digest phashes[] = { 0xee,0x49,0xac,0xe1,0x8b,0x13,0xc5,0x13, } }, + { 0xa70c008, { + 0x0f,0xdb,0x37,0xa1,0x10,0xaf,0xd4,0x21, + 0x94,0x0d,0xa4,0xa2,0xe9,0x86,0x6c,0x0e, + 0x85,0x7c,0x36,0x30,0xa3,0x3a,0x78,0x66, + 0x18,0x10,0x60,0x0d,0x78,0x3d,0x44,0xd0, + } + }, { 0xaa00116, { 0xe8,0x4c,0x2c,0x88,0xa1,0xac,0x24,0x63, 0x65,0xe5,0xaa,0x2d,0x16,0xa9,0xc3,0xf5, @@ -441,4 +546,11 @@ static const struct patch_digest phashes[] = { 0x68,0x2f,0x46,0xee,0xfe,0xc6,0x6d,0xef, } }, + { 0xaa00216, { + 0x79,0xfb,0x5b,0x9f,0xb6,0xe6,0xa8,0xf5, + 0x4e,0x7c,0x4f,0x8e,0x1d,0xad,0xd0,0x08, + 0xc2,0x43,0x7c,0x8b,0xe6,0xdb,0xd0,0xd2, + 0xe8,0x39,0x26,0xc1,0xe5,0x5a,0x48,0xf1, + } + }, }; diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 7109cbfcad4f..187d527ef73b 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -498,6 +498,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) struct rdt_hw_mon_domain *hw_dom; struct rdt_domain_hdr *hdr; struct rdt_mon_domain *d; + struct cacheinfo *ci; int err; lockdep_assert_held(&domain_list_lock); @@ -525,12 +526,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) d = &hw_dom->d_resctrl; d->hdr.id = id; d->hdr.type = RESCTRL_MON_DOMAIN; - d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); - if (!d->ci) { + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci) { pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name); mon_domain_free(hw_dom); return; } + d->ci_id = ci->id; cpumask_set_cpu(cpu, &d->hdr.cpu_mask); arch_mon_domain_online(r, d); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index dbf6d71bdf18..b4a1f6732a3a 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -50,6 +50,8 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, + { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, + { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 }, { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 102641fd2172..8b1a9733d13e 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -385,7 +385,7 @@ static void kgdb_disable_hw_debug(struct pt_regs *regs) struct perf_event *bp; /* Disable hardware debugging while we are in kgdb: */ - set_debugreg(0UL, 7); + set_debugreg(DR7_FIXED_1, 7); for (i = 0; i < HBP_NUM; i++) { if (!breakinfo[i].enabled) continue; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 704883c21f3a..a838be04f874 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -907,16 +907,24 @@ static __init bool prefer_mwait_c1_over_halt(void) */ static __cpuidle void mwait_idle(void) { + if (need_resched()) + return; + + x86_idle_clear_cpu_buffers(); + if (!current_set_polling_and_test()) { const void *addr = ¤t_thread_info()->flags; alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); __monitor(addr, 0, 0); - if (!need_resched()) { - __sti_mwait(0, 0); - raw_local_irq_disable(); - } + if (need_resched()) + goto out; + + __sti_mwait(0, 0); + raw_local_irq_disable(); } + +out: __current_clr_polling(); } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a10e180cbf23..3ef15c2f152f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -93,7 +93,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, /* Only print out debug registers if they are in their non-default state. */ if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) && - (d6 == DR6_RESERVED) && (d7 == 0x400)) + (d6 == DR6_RESERVED) && (d7 == DR7_FIXED_1)) return; printk("%sDR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 8d6cf25127aa..b972bf72fb8b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -133,7 +133,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode, /* Only print out debug registers if they are in their non-default state. */ if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) && - (d6 == DR6_RESERVED) && (d7 == 0x400))) { + (d6 == DR6_RESERVED) && (d7 == DR7_FIXED_1))) { printk("%sDR0: %016lx DR1: %016lx DR2: %016lx\n", log_lvl, d0, d1, d2); printk("%sDR3: %016lx DR6: %016lx DR7: %016lx\n", diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 98123ff10506..42bbc42bd350 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -152,6 +152,8 @@ SYSCALL32_DEFINE0(sigreturn) struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); sigset_t set; + prevent_single_step_upon_eretu(regs); + if (!access_ok(frame, sizeof(*frame))) goto badframe; if (__get_user(set.sig[0], &frame->sc.oldmask) @@ -175,6 +177,8 @@ SYSCALL32_DEFINE0(rt_sigreturn) struct rt_sigframe_ia32 __user *frame; sigset_t set; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4); if (!access_ok(frame, sizeof(*frame))) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ee9453891901..d483b585c6c6 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -250,6 +250,8 @@ SYSCALL_DEFINE0(rt_sigreturn) sigset_t set; unsigned long uc_flags; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); if (!access_ok(frame, sizeof(*frame))) goto badframe; @@ -366,6 +368,8 @@ COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn) sigset_t set; unsigned long uc_flags; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); if (!access_ok(frame, sizeof(*frame))) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c5c897a86418..36354b470590 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -1022,24 +1022,32 @@ static bool is_sysenter_singlestep(struct pt_regs *regs) #endif } -static __always_inline unsigned long debug_read_clear_dr6(void) +static __always_inline unsigned long debug_read_reset_dr6(void) { unsigned long dr6; + get_debugreg(dr6, 6); + dr6 ^= DR6_RESERVED; /* Flip to positive polarity */ + /* * The Intel SDM says: * - * Certain debug exceptions may clear bits 0-3. The remaining - * contents of the DR6 register are never cleared by the - * processor. To avoid confusion in identifying debug - * exceptions, debug handlers should clear the register before - * returning to the interrupted task. + * Certain debug exceptions may clear bits 0-3 of DR6. + * + * BLD induced #DB clears DR6.BLD and any other debug + * exception doesn't modify DR6.BLD. * - * Keep it simple: clear DR6 immediately. + * RTM induced #DB clears DR6.RTM and any other debug + * exception sets DR6.RTM. + * + * To avoid confusion in identifying debug exceptions, + * debug handlers should set DR6.BLD and DR6.RTM, and + * clear other DR6 bits before returning. + * + * Keep it simple: write DR6 with its architectural reset + * value 0xFFFF0FF0, defined as DR6_RESERVED, immediately. */ - get_debugreg(dr6, 6); set_debugreg(DR6_RESERVED, 6); - dr6 ^= DR6_RESERVED; /* Flip to positive polarity */ return dr6; } @@ -1239,13 +1247,13 @@ out: /* IST stack entry */ DEFINE_IDTENTRY_DEBUG(exc_debug) { - exc_debug_kernel(regs, debug_read_clear_dr6()); + exc_debug_kernel(regs, debug_read_reset_dr6()); } /* User entry, runs on regular task stack */ DEFINE_IDTENTRY_DEBUG_USER(exc_debug) { - exc_debug_user(regs, debug_read_clear_dr6()); + exc_debug_user(regs, debug_read_reset_dr6()); } #ifdef CONFIG_X86_FRED @@ -1264,7 +1272,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug) { /* * FRED #DB stores DR6 on the stack in the format which - * debug_read_clear_dr6() returns for the IDT entry points. + * debug_read_reset_dr6() returns for the IDT entry points. */ unsigned long dr6 = fred_event_data(regs); @@ -1279,7 +1287,7 @@ DEFINE_FREDENTRY_DEBUG(exc_debug) /* 32 bit does not have separate entry points. */ DEFINE_IDTENTRY_RAW(exc_debug) { - unsigned long dr6 = debug_read_clear_dr6(); + unsigned long dr6 = debug_read_reset_dr6(); if (user_mode(regs)) exc_debug_user(regs, dr6); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b2d006756e02..f84bc0569c9c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1165,6 +1165,8 @@ void kvm_set_cpu_caps(void) */ SYNTHESIZED_F(LFENCE_RDTSC), /* SmmPgCfgLock */ + /* 4: Resv */ + SYNTHESIZED_F(VERW_CLEAR), F(NULL_SEL_CLR_BASE), /* UpperAddressIgnore */ F(AUTOIBRS), @@ -1179,6 +1181,11 @@ void kvm_set_cpu_caps(void) F(SRSO_USER_KERNEL_NO), ); + kvm_cpu_cap_init(CPUID_8000_0021_ECX, + SYNTHESIZED_F(TSA_SQ_NO), + SYNTHESIZED_F(TSA_L1_NO), + ); + kvm_cpu_cap_init(CPUID_8000_0022_EAX, F(PERFMON_V2), ); @@ -1748,8 +1755,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; case 0x80000021: - entry->ebx = entry->ecx = entry->edx = 0; + entry->ebx = entry->edx = 0; cpuid_entry_override(entry, CPUID_8000_0021_EAX); + cpuid_entry_override(entry, CPUID_8000_0021_ECX); break; /* AMD Extended Performance Monitoring and Debug */ case 0x80000022: { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 24f0318c50d7..ee27064dd72f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1979,6 +1979,9 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY) goto out_flush_all; + if (is_noncanonical_invlpg_address(entries[i], vcpu)) + continue; + /* * Lower 12 bits of 'address' encode the number of additional * pages to flush. @@ -2001,11 +2004,11 @@ out_flush_all: static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) { struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + unsigned long *vcpu_mask = hv_vcpu->vcpu_mask; u64 *sparse_banks = hv_vcpu->sparse_banks; struct kvm *kvm = vcpu->kvm; struct hv_tlb_flush_ex flush_ex; struct hv_tlb_flush flush; - DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; /* * Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE' diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index fde0ae986003..c53b92379e6e 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -52,6 +52,10 @@ /* CPUID level 0x80000022 (EAX) */ #define KVM_X86_FEATURE_PERFMON_V2 KVM_X86_FEATURE(CPUID_8000_0022_EAX, 0) +/* CPUID level 0x80000021 (ECX) */ +#define KVM_X86_FEATURE_TSA_SQ_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 1) +#define KVM_X86_FEATURE_TSA_L1_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 2) + struct cpuid_reg { u32 function; u32 index; @@ -82,6 +86,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX}, [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, [CPUID_24_0_EBX] = { 0x24, 0, CPUID_EBX}, + [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX}, }; /* @@ -121,6 +126,8 @@ static __always_inline u32 __feature_translate(int x86_feature) KVM_X86_TRANSLATE_FEATURE(PERFMON_V2); KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); + KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO); + KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO); default: return x86_feature; } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 459c3b791fd4..b201f77fcd49 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1971,6 +1971,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) struct kvm_vcpu *src_vcpu; unsigned long i; + if (src->created_vcpus != atomic_read(&src->online_vcpus) || + dst->created_vcpus != atomic_read(&dst->online_vcpus)) + return -EBUSY; + if (!sev_es_guest(src)) return 0; @@ -4445,8 +4449,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) * the VMSA will be NULL if this vCPU is the destination for intrahost * migration, and will be copied later. */ - if (svm->sev_es.vmsa && !svm->sev_es.snp_has_guest_vmsa) - svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + if (!svm->sev_es.snp_has_guest_vmsa) { + if (svm->sev_es.vmsa) + svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + else + svm->vmcb->control.vmsa_pa = INVALID_PAGE; + } if (cpu_feature_enabled(X86_FEATURE_ALLOWED_SEV_FEATURES)) svm->vmcb->control.allowed_sev_features = sev->vmsa_features | diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 0c61153b275f..235c4af6b692 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -169,6 +169,9 @@ SYM_FUNC_START(__svm_vcpu_run) #endif mov VCPU_RDI(%_ASM_DI), %_ASM_DI + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 3: vmrun %_ASM_AX 4: @@ -335,6 +338,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) mov SVM_current_vmcb(%rdi), %rax mov KVM_VMCB_pa(%rax), %rax + /* Clobbers EFLAGS.ZF */ + VM_CLEAR_CPU_BUFFERS + /* Enter guest mode */ 1: vmrun %rax 2: diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index b952bc673271..f31ccdeb905b 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -173,6 +173,9 @@ static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char i tdx_clear_unsupported_cpuid(entry); } +#define TDVMCALLINFO_GET_QUOTE BIT(0) +#define TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT BIT(1) + static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, struct kvm_tdx_capabilities *caps) { @@ -188,6 +191,10 @@ static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, caps->cpuid.nent = td_conf->num_cpuid_config; + caps->user_tdvmcallinfo_1_r11 = + TDVMCALLINFO_GET_QUOTE | + TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT; + for (i = 0; i < td_conf->num_cpuid_config; i++) td_init_cpuid_entry2(&caps->cpuid.entries[i], i); @@ -1212,11 +1219,13 @@ static int tdx_map_gpa(struct kvm_vcpu *vcpu) /* * Converting TDVMCALL_MAP_GPA to KVM_HC_MAP_GPA_RANGE requires * userspace to enable KVM_CAP_EXIT_HYPERCALL with KVM_HC_MAP_GPA_RANGE - * bit set. If not, the error code is not defined in GHCI for TDX, use - * TDVMCALL_STATUS_INVALID_OPERAND for this case. + * bit set. This is a base call so it should always be supported, but + * KVM has no way to ensure that userspace implements the GHCI correctly. + * So if KVM_HC_MAP_GPA_RANGE does not cause a VMEXIT, return an error + * to the guest. */ if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) { - ret = TDVMCALL_STATUS_INVALID_OPERAND; + ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED; goto error; } @@ -1449,20 +1458,106 @@ error: return 1; } +static int tdx_complete_get_td_vm_call_info(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + + tdvmcall_set_return_code(vcpu, vcpu->run->tdx.get_tdvmcall_info.ret); + + /* + * For now, there is no TDVMCALL beyond GHCI base API supported by KVM + * directly without the support from userspace, just set the value + * returned from userspace. + */ + tdx->vp_enter_args.r11 = vcpu->run->tdx.get_tdvmcall_info.r11; + tdx->vp_enter_args.r12 = vcpu->run->tdx.get_tdvmcall_info.r12; + tdx->vp_enter_args.r13 = vcpu->run->tdx.get_tdvmcall_info.r13; + tdx->vp_enter_args.r14 = vcpu->run->tdx.get_tdvmcall_info.r14; + + return 1; +} + static int tdx_get_td_vm_call_info(struct kvm_vcpu *vcpu) { struct vcpu_tdx *tdx = to_tdx(vcpu); - if (tdx->vp_enter_args.r12) - tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); - else { + switch (tdx->vp_enter_args.r12) { + case 0: tdx->vp_enter_args.r11 = 0; + tdx->vp_enter_args.r12 = 0; tdx->vp_enter_args.r13 = 0; tdx->vp_enter_args.r14 = 0; + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUCCESS); + return 1; + case 1: + vcpu->run->tdx.get_tdvmcall_info.leaf = tdx->vp_enter_args.r12; + vcpu->run->exit_reason = KVM_EXIT_TDX; + vcpu->run->tdx.flags = 0; + vcpu->run->tdx.nr = TDVMCALL_GET_TD_VM_CALL_INFO; + vcpu->run->tdx.get_tdvmcall_info.ret = TDVMCALL_STATUS_SUCCESS; + vcpu->run->tdx.get_tdvmcall_info.r11 = 0; + vcpu->run->tdx.get_tdvmcall_info.r12 = 0; + vcpu->run->tdx.get_tdvmcall_info.r13 = 0; + vcpu->run->tdx.get_tdvmcall_info.r14 = 0; + vcpu->arch.complete_userspace_io = tdx_complete_get_td_vm_call_info; + return 0; + default: + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + return 1; } +} + +static int tdx_complete_simple(struct kvm_vcpu *vcpu) +{ + tdvmcall_set_return_code(vcpu, vcpu->run->tdx.unknown.ret); return 1; } +static int tdx_get_quote(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + u64 gpa = tdx->vp_enter_args.r12; + u64 size = tdx->vp_enter_args.r13; + + /* The gpa of buffer must have shared bit set. */ + if (vt_is_tdx_private_gpa(vcpu->kvm, gpa)) { + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + return 1; + } + + vcpu->run->exit_reason = KVM_EXIT_TDX; + vcpu->run->tdx.flags = 0; + vcpu->run->tdx.nr = TDVMCALL_GET_QUOTE; + vcpu->run->tdx.get_quote.ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED; + vcpu->run->tdx.get_quote.gpa = gpa & ~gfn_to_gpa(kvm_gfn_direct_bits(tdx->vcpu.kvm)); + vcpu->run->tdx.get_quote.size = size; + + vcpu->arch.complete_userspace_io = tdx_complete_simple; + + return 0; +} + +static int tdx_setup_event_notify_interrupt(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + u64 vector = tdx->vp_enter_args.r12; + + if (vector < 32 || vector > 255) { + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + return 1; + } + + vcpu->run->exit_reason = KVM_EXIT_TDX; + vcpu->run->tdx.flags = 0; + vcpu->run->tdx.nr = TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT; + vcpu->run->tdx.setup_event_notify.ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED; + vcpu->run->tdx.setup_event_notify.vector = vector; + + vcpu->arch.complete_userspace_io = tdx_complete_simple; + + return 0; +} + static int handle_tdvmcall(struct kvm_vcpu *vcpu) { switch (tdvmcall_leaf(vcpu)) { @@ -1472,11 +1567,15 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu) return tdx_report_fatal_error(vcpu); case TDVMCALL_GET_TD_VM_CALL_INFO: return tdx_get_td_vm_call_info(vcpu); + case TDVMCALL_GET_QUOTE: + return tdx_get_quote(vcpu); + case TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: + return tdx_setup_event_notify_interrupt(vcpu); default: break; } - tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED); return 1; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4953846cb30d..191a9ed0da22 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7291,7 +7291,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vmx_l1d_flush(vcpu); else if (static_branch_unlikely(&cpu_buf_vm_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) - mds_clear_cpu_buffers(); + x86_clear_cpu_buffers(); vmx_disable_fb_clear(vmx); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b58a74c1722d..357b9e3a6cef 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3258,9 +3258,11 @@ int kvm_guest_time_update(struct kvm_vcpu *v) /* With all the info we got, fill in the values */ - if (kvm_caps.has_tsc_control) + if (kvm_caps.has_tsc_control) { tgt_tsc_khz = kvm_scale_tsc(tgt_tsc_khz, v->arch.l1_tsc_scaling_ratio); + tgt_tsc_khz = tgt_tsc_khz ? : 1; + } if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) { kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL, @@ -11035,7 +11037,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (unlikely(vcpu->arch.switch_db_regs && !(vcpu->arch.switch_db_regs & KVM_DEBUGREG_AUTO_SWITCH))) { - set_debugreg(0, 7); + set_debugreg(DR7_FIXED_1, 7); set_debugreg(vcpu->arch.eff_db[0], 0); set_debugreg(vcpu->arch.eff_db[1], 1); set_debugreg(vcpu->arch.eff_db[2], 2); @@ -11044,7 +11046,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) kvm_x86_call(set_dr6)(vcpu, vcpu->arch.dr6); } else if (unlikely(hw_breakpoint_active())) { - set_debugreg(0, 7); + set_debugreg(DR7_FIXED_1, 7); } vcpu->arch.host_debugctl = get_debugctlmsr(); diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 9b029bb29a16..5fa2cca43653 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1971,8 +1971,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm, { struct kvm_vcpu *vcpu; - if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm)) - return -EINVAL; + /* + * Don't check for the port being within range of max_evtchn_port(). + * Userspace can configure what ever targets it likes; events just won't + * be delivered if/while the target is invalid, just like userspace can + * configure MSIs which target non-existent APICs. + * + * This allow on Live Migration and Live Update, the IRQ routing table + * can be restored *independently* of other things like creating vCPUs, + * without imposing an ordering dependency on userspace. In this + * particular case, the problematic ordering would be with setting the + * Xen 'long mode' flag, which changes max_evtchn_port() to allow 4096 + * instead of 1024 event channels. + */ /* We only support 2 level event channels for now */ if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 607d6a2e66e2..8a34fff6ab2b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -30,7 +30,6 @@ #include <linux/initrd.h> #include <linux/cpumask.h> #include <linux/gfp.h> -#include <linux/execmem.h> #include <asm/asm.h> #include <asm/bios_ebda.h> @@ -749,8 +748,6 @@ void mark_rodata_ro(void) pr_info("Write protecting kernel text and read-only data: %luk\n", size >> 10); - execmem_cache_make_ro(); - kernel_set_to_readonly = 1; #ifdef CONFIG_CPA_DEBUG diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ee66fae9ebcc..fdb6cab524f0 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -34,7 +34,6 @@ #include <linux/gfp.h> #include <linux/kcore.h> #include <linux/bootmem_info.h> -#include <linux/execmem.h> #include <asm/processor.h> #include <asm/bios_ebda.h> @@ -1392,8 +1391,6 @@ void mark_rodata_ro(void) (end - start) >> 10); set_memory_ro(start, (end - start) >> PAGE_SHIFT); - execmem_cache_make_ro(); - kernel_set_to_readonly = 1; /* diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 46edc11726b7..8834c76f91c9 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1257,6 +1257,9 @@ static int collapse_pmd_page(pmd_t *pmd, unsigned long addr, pgprot_t pgprot; int i = 0; + if (!cpu_feature_enabled(X86_FEATURE_PSE)) + return 0; + addr &= PMD_MASK; pte = pte_offset_kernel(pmd, addr); first = *pte; diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 190299834011..c0c40b67524e 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -98,6 +98,11 @@ void __init pti_check_boottime_disable(void) return; setup_force_cpu_cap(X86_FEATURE_PTI); + + if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { + pr_debug("PTI enabled, disabling INVLPGB\n"); + setup_clear_cpu_cap(X86_FEATURE_INVLPGB); + } } static int __init pti_parse_cmdline(char *arg) diff --git a/arch/x86/um/ptrace.c b/arch/x86/um/ptrace.c index 3275870330fe..fae8aabad10f 100644 --- a/arch/x86/um/ptrace.c +++ b/arch/x86/um/ptrace.c @@ -161,7 +161,7 @@ static int fpregs_legacy_set(struct task_struct *target, from = kbuf; } - return um_fxsr_from_i387(fxsave, &buf); + return um_fxsr_from_i387(fxsave, from); } #endif diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 2457d13c3f9e..c7a9a087ccaf 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -75,8 +75,9 @@ static inline void seamcall_err_ret(u64 fn, u64 err, args->r9, args->r10, args->r11); } -static inline int sc_retry_prerr(sc_func_t func, sc_err_func_t err_func, - u64 fn, struct tdx_module_args *args) +static __always_inline int sc_retry_prerr(sc_func_t func, + sc_err_func_t err_func, + u64 fn, struct tdx_module_args *args) { u64 sret = sc_retry(func, fn, args); diff --git a/block/genhd.c b/block/genhd.c index 8171a6bc3210..c26733f6324b 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -128,23 +128,27 @@ static void part_stat_read_all(struct block_device *part, static void bdev_count_inflight_rw(struct block_device *part, unsigned int inflight[2], bool mq_driver) { + int write = 0; + int read = 0; int cpu; if (mq_driver) { blk_mq_in_driver_rw(part, inflight); - } else { - for_each_possible_cpu(cpu) { - inflight[READ] += part_stat_local_read_cpu( - part, in_flight[READ], cpu); - inflight[WRITE] += part_stat_local_read_cpu( - part, in_flight[WRITE], cpu); - } + return; + } + + for_each_possible_cpu(cpu) { + read += part_stat_local_read_cpu(part, in_flight[READ], cpu); + write += part_stat_local_read_cpu(part, in_flight[WRITE], cpu); } - if (WARN_ON_ONCE((int)inflight[READ] < 0)) - inflight[READ] = 0; - if (WARN_ON_ONCE((int)inflight[WRITE] < 0)) - inflight[WRITE] = 0; + /* + * While iterating all CPUs, some IOs may be issued from a CPU already + * traversed and complete on a CPU that has not yet been traversed, + * causing the inflight number to be negative. + */ + inflight[READ] = read > 0 ? read : 0; + inflight[WRITE] = write > 0 ? write : 0; } /** diff --git a/crypto/Kconfig b/crypto/Kconfig index e9fee7818e27..e1cfd0d4cc8f 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -176,16 +176,33 @@ config CRYPTO_USER config CRYPTO_SELFTESTS bool "Enable cryptographic self-tests" - depends on DEBUG_KERNEL + depends on EXPERT help Enable the cryptographic self-tests. The cryptographic self-tests run at boot time, or at algorithm registration time if algorithms are dynamically loaded later. - This is primarily intended for developer use. It should not be - enabled in production kernels, unless you are trying to use these - tests to fulfill a FIPS testing requirement. + There are two main use cases for these tests: + + - Development and pre-release testing. In this case, also enable + CRYPTO_SELFTESTS_FULL to get the full set of tests. All crypto code + in the kernel is expected to pass the full set of tests. + + - Production kernels, to help prevent buggy drivers from being used + and/or meet FIPS 140-3 pre-operational testing requirements. In + this case, enable CRYPTO_SELFTESTS but not CRYPTO_SELFTESTS_FULL. + +config CRYPTO_SELFTESTS_FULL + bool "Enable the full set of cryptographic self-tests" + depends on CRYPTO_SELFTESTS + help + Enable the full set of cryptographic self-tests for each algorithm. + + The full set of tests should be enabled for development and + pre-release testing, but not in production kernels. + + All crypto code in the kernel is expected to pass the full tests. config CRYPTO_NULL tristate "Null algorithms" diff --git a/crypto/ahash.c b/crypto/ahash.c index e10bc2659ae4..bc84a07c924c 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -600,12 +600,14 @@ static void ahash_def_finup_done2(void *data, int err) static int ahash_def_finup_finish1(struct ahash_request *req, int err) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + if (err) goto out; req->base.complete = ahash_def_finup_done2; - err = crypto_ahash_final(req); + err = crypto_ahash_alg(tfm)->final(req); if (err == -EINPROGRESS || err == -EBUSY) return err; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 72005074a5c2..32f753d6c430 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -45,6 +45,7 @@ static bool notests; module_param(notests, bool, 0644); MODULE_PARM_DESC(notests, "disable all crypto self-tests"); +#ifdef CONFIG_CRYPTO_SELFTESTS_FULL static bool noslowtests; module_param(noslowtests, bool, 0644); MODULE_PARM_DESC(noslowtests, "disable slow crypto self-tests"); @@ -52,6 +53,10 @@ MODULE_PARM_DESC(noslowtests, "disable slow crypto self-tests"); static unsigned int fuzz_iterations = 100; module_param(fuzz_iterations, uint, 0644); MODULE_PARM_DESC(fuzz_iterations, "number of fuzz test iterations"); +#else +#define noslowtests 1 +#define fuzz_iterations 0 +#endif #ifndef CONFIG_CRYPTO_SELFTESTS @@ -319,9 +324,9 @@ struct testvec_config { /* * The following are the lists of testvec_configs to test for each algorithm - * type when the fast crypto self-tests are enabled. They aim to provide good - * test coverage, while keeping the test time much shorter than the full tests - * so that the fast tests can be used to fulfill FIPS 140 testing requirements. + * type when the "fast" crypto self-tests are enabled. They aim to provide good + * test coverage, while keeping the test time much shorter than the "full" tests + * so that the "fast" tests can be enabled in a wider range of circumstances. */ /* Configs for skciphers and aeads */ @@ -1183,14 +1188,18 @@ static void generate_random_testvec_config(struct rnd_state *rng, static void crypto_disable_simd_for_test(void) { +#ifdef CONFIG_CRYPTO_SELFTESTS_FULL migrate_disable(); __this_cpu_write(crypto_simd_disabled_for_test, true); +#endif } static void crypto_reenable_simd_for_test(void) { +#ifdef CONFIG_CRYPTO_SELFTESTS_FULL __this_cpu_write(crypto_simd_disabled_for_test, false); migrate_enable(); +#endif } /* diff --git a/crypto/wp512.c b/crypto/wp512.c index 41f13d490333..229b189a7988 100644 --- a/crypto/wp512.c +++ b/crypto/wp512.c @@ -21,10 +21,10 @@ */ #include <crypto/internal/hash.h> #include <linux/init.h> +#include <linux/kernel.h> #include <linux/module.h> -#include <linux/mm.h> -#include <asm/byteorder.h> -#include <linux/types.h> +#include <linux/string.h> +#include <linux/unaligned.h> #define WP512_DIGEST_SIZE 64 #define WP384_DIGEST_SIZE 48 @@ -37,9 +37,6 @@ struct wp512_ctx { u8 bitLength[WP512_LENGTHBYTES]; - u8 buffer[WP512_BLOCK_SIZE]; - int bufferBits; - int bufferPos; u64 hash[WP512_DIGEST_SIZE/8]; }; @@ -779,16 +776,16 @@ static const u64 rc[WHIRLPOOL_ROUNDS] = { * The core Whirlpool transform. */ -static __no_kmsan_checks void wp512_process_buffer(struct wp512_ctx *wctx) { +static __no_kmsan_checks void wp512_process_buffer(struct wp512_ctx *wctx, + const u8 *buffer) { int i, r; u64 K[8]; /* the round key */ u64 block[8]; /* mu(buffer) */ u64 state[8]; /* the cipher state */ u64 L[8]; - const __be64 *buffer = (const __be64 *)wctx->buffer; for (i = 0; i < 8; i++) - block[i] = be64_to_cpu(buffer[i]); + block[i] = get_unaligned_be64(buffer + i * 8); state[0] = block[0] ^ (K[0] = wctx->hash[0]); state[1] = block[1] ^ (K[1] = wctx->hash[1]); @@ -991,8 +988,6 @@ static int wp512_init(struct shash_desc *desc) { int i; memset(wctx->bitLength, 0, 32); - wctx->bufferBits = wctx->bufferPos = 0; - wctx->buffer[0] = 0; for (i = 0; i < 8; i++) { wctx->hash[i] = 0L; } @@ -1000,84 +995,54 @@ static int wp512_init(struct shash_desc *desc) { return 0; } -static int wp512_update(struct shash_desc *desc, const u8 *source, - unsigned int len) +static void wp512_add_length(u8 *bitLength, u64 value) { - struct wp512_ctx *wctx = shash_desc_ctx(desc); - int sourcePos = 0; - unsigned int bits_len = len * 8; // convert to number of bits - int sourceGap = (8 - ((int)bits_len & 7)) & 7; - int bufferRem = wctx->bufferBits & 7; + u32 carry; int i; - u32 b, carry; - u8 *buffer = wctx->buffer; - u8 *bitLength = wctx->bitLength; - int bufferBits = wctx->bufferBits; - int bufferPos = wctx->bufferPos; - u64 value = bits_len; for (i = 31, carry = 0; i >= 0 && (carry != 0 || value != 0ULL); i--) { carry += bitLength[i] + ((u32)value & 0xff); bitLength[i] = (u8)carry; carry >>= 8; value >>= 8; } - while (bits_len > 8) { - b = ((source[sourcePos] << sourceGap) & 0xff) | - ((source[sourcePos + 1] & 0xff) >> (8 - sourceGap)); - buffer[bufferPos++] |= (u8)(b >> bufferRem); - bufferBits += 8 - bufferRem; - if (bufferBits == WP512_BLOCK_SIZE * 8) { - wp512_process_buffer(wctx); - bufferBits = bufferPos = 0; - } - buffer[bufferPos] = b << (8 - bufferRem); - bufferBits += bufferRem; - bits_len -= 8; - sourcePos++; - } - if (bits_len > 0) { - b = (source[sourcePos] << sourceGap) & 0xff; - buffer[bufferPos] |= b >> bufferRem; - } else { - b = 0; - } - if (bufferRem + bits_len < 8) { - bufferBits += bits_len; - } else { - bufferPos++; - bufferBits += 8 - bufferRem; - bits_len -= 8 - bufferRem; - if (bufferBits == WP512_BLOCK_SIZE * 8) { - wp512_process_buffer(wctx); - bufferBits = bufferPos = 0; - } - buffer[bufferPos] = b << (8 - bufferRem); - bufferBits += (int)bits_len; - } +} - wctx->bufferBits = bufferBits; - wctx->bufferPos = bufferPos; +static int wp512_update(struct shash_desc *desc, const u8 *source, + unsigned int len) +{ + struct wp512_ctx *wctx = shash_desc_ctx(desc); + unsigned int remain = len % WP512_BLOCK_SIZE; + u64 bits_len = (len - remain) * 8ull; + u8 *bitLength = wctx->bitLength; - return 0; + wp512_add_length(bitLength, bits_len); + do { + wp512_process_buffer(wctx, source); + source += WP512_BLOCK_SIZE; + bits_len -= WP512_BLOCK_SIZE * 8; + } while (bits_len); + + return remain; } -static int wp512_final(struct shash_desc *desc, u8 *out) +static int wp512_finup(struct shash_desc *desc, const u8 *src, + unsigned int bufferPos, u8 *out) { struct wp512_ctx *wctx = shash_desc_ctx(desc); int i; - u8 *buffer = wctx->buffer; u8 *bitLength = wctx->bitLength; - int bufferBits = wctx->bufferBits; - int bufferPos = wctx->bufferPos; __be64 *digest = (__be64 *)out; + u8 buffer[WP512_BLOCK_SIZE]; - buffer[bufferPos] |= 0x80U >> (bufferBits & 7); + wp512_add_length(bitLength, bufferPos * 8); + memcpy(buffer, src, bufferPos); + buffer[bufferPos] = 0x80U; bufferPos++; if (bufferPos > WP512_BLOCK_SIZE - WP512_LENGTHBYTES) { if (bufferPos < WP512_BLOCK_SIZE) memset(&buffer[bufferPos], 0, WP512_BLOCK_SIZE - bufferPos); - wp512_process_buffer(wctx); + wp512_process_buffer(wctx, buffer); bufferPos = 0; } if (bufferPos < WP512_BLOCK_SIZE - WP512_LENGTHBYTES) @@ -1086,31 +1051,32 @@ static int wp512_final(struct shash_desc *desc, u8 *out) bufferPos = WP512_BLOCK_SIZE - WP512_LENGTHBYTES; memcpy(&buffer[WP512_BLOCK_SIZE - WP512_LENGTHBYTES], bitLength, WP512_LENGTHBYTES); - wp512_process_buffer(wctx); + wp512_process_buffer(wctx, buffer); + memzero_explicit(buffer, sizeof(buffer)); for (i = 0; i < WP512_DIGEST_SIZE/8; i++) digest[i] = cpu_to_be64(wctx->hash[i]); - wctx->bufferBits = bufferBits; - wctx->bufferPos = bufferPos; return 0; } -static int wp384_final(struct shash_desc *desc, u8 *out) +static int wp384_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, u8 *out) { u8 D[64]; - wp512_final(desc, D); + wp512_finup(desc, src, len, D); memcpy(out, D, WP384_DIGEST_SIZE); memzero_explicit(D, WP512_DIGEST_SIZE); return 0; } -static int wp256_final(struct shash_desc *desc, u8 *out) +static int wp256_finup(struct shash_desc *desc, const u8 *src, + unsigned int len, u8 *out) { u8 D[64]; - wp512_final(desc, D); + wp512_finup(desc, src, len, D); memcpy(out, D, WP256_DIGEST_SIZE); memzero_explicit(D, WP512_DIGEST_SIZE); @@ -1121,11 +1087,12 @@ static struct shash_alg wp_algs[3] = { { .digestsize = WP512_DIGEST_SIZE, .init = wp512_init, .update = wp512_update, - .final = wp512_final, + .finup = wp512_finup, .descsize = sizeof(struct wp512_ctx), .base = { .cra_name = "wp512", .cra_driver_name = "wp512-generic", + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = WP512_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -1133,11 +1100,12 @@ static struct shash_alg wp_algs[3] = { { .digestsize = WP384_DIGEST_SIZE, .init = wp512_init, .update = wp512_update, - .final = wp384_final, + .finup = wp384_finup, .descsize = sizeof(struct wp512_ctx), .base = { .cra_name = "wp384", .cra_driver_name = "wp384-generic", + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = WP512_BLOCK_SIZE, .cra_module = THIS_MODULE, } @@ -1145,11 +1113,12 @@ static struct shash_alg wp_algs[3] = { { .digestsize = WP256_DIGEST_SIZE, .init = wp512_init, .update = wp512_update, - .final = wp256_final, + .finup = wp256_finup, .descsize = sizeof(struct wp512_ctx), .base = { .cra_name = "wp256", .cra_driver_name = "wp256-generic", + .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = WP512_BLOCK_SIZE, .cra_module = THIS_MODULE, } diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c index c8f37f4e6626..fef6fb29ece4 100644 --- a/drivers/acpi/acpica/dsmethod.c +++ b/drivers/acpi/acpica/dsmethod.c @@ -483,6 +483,13 @@ acpi_ds_call_control_method(struct acpi_thread_state *thread, return_ACPI_STATUS(AE_NULL_OBJECT); } + if (this_walk_state->num_operands < obj_desc->method.param_count) { + ACPI_ERROR((AE_INFO, "Missing argument for method [%4.4s]", + acpi_ut_get_node_name(method_node))); + + return_ACPI_STATUS(AE_AML_UNINITIALIZED_ARG); + } + /* Init for new method, possibly wait on method mutex */ status = diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 45593612a4db..6905b56bf3e4 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -243,23 +243,10 @@ static int acpi_battery_get_property(struct power_supply *psy, break; case POWER_SUPPLY_PROP_CURRENT_NOW: case POWER_SUPPLY_PROP_POWER_NOW: - if (battery->rate_now == ACPI_BATTERY_VALUE_UNKNOWN) { + if (battery->rate_now == ACPI_BATTERY_VALUE_UNKNOWN) ret = -ENODEV; - break; - } - - val->intval = battery->rate_now * 1000; - /* - * When discharging, the current should be reported as a - * negative number as per the power supply class interface - * definition. - */ - if (psp == POWER_SUPPLY_PROP_CURRENT_NOW && - (battery->state & ACPI_BATTERY_STATE_DISCHARGING) && - acpi_battery_handle_discharging(battery) - == POWER_SUPPLY_STATUS_DISCHARGING) - val->intval = -val->intval; - + else + val->intval = battery->rate_now * 1000; break; case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN: case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN: diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 163ac909bd06..aa93b0ecbbc6 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1410,8 +1410,15 @@ static bool ahci_broken_suspend(struct pci_dev *pdev) static bool ahci_broken_lpm(struct pci_dev *pdev) { + /* + * Platforms with LPM problems. + * If driver_data is NULL, there is no existing BIOS version with + * functioning LPM. + * If driver_data is non-NULL, then driver_data contains the DMI BIOS + * build date of the first BIOS version with functioning LPM (i.e. older + * BIOS versions have broken LPM). + */ static const struct dmi_system_id sysids[] = { - /* Various Lenovo 50 series have LPM issues with older BIOSen */ { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), @@ -1438,13 +1445,30 @@ static bool ahci_broken_lpm(struct pci_dev *pdev) DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W541"), }, + .driver_data = "20180409", /* 2.35 */ + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ASUSPRO D840MB_M840SA"), + }, + /* 320 is broken, there is no known good version. */ + }, + { /* - * Note date based on release notes, 2.35 has been - * reported to be good, but I've been unable to get - * a hold of the reporter to get the DMI BIOS date. - * TODO: fix this. + * AMD 500 Series Chipset SATA Controller [1022:43eb] + * on this motherboard timeouts on ports 5 and 6 when + * LPM is enabled, at least with WDC WD20EFAX-68FB5N0 + * hard drives. LPM with the same drive works fine on + * all other ports on the same controller. */ - .driver_data = "20180310", /* 2.35 */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, + "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, + "ROG STRIX B550-F GAMING (WI-FI)"), + }, + /* 3621 is broken, there is no known good version. */ }, { } /* terminate list */ }; @@ -1455,6 +1479,9 @@ static bool ahci_broken_lpm(struct pci_dev *pdev) if (!dmi) return false; + if (!dmi->driver_data) + return true; + dmi_get_date(DMI_BIOS_DATE, &year, &month, &date); snprintf(buf, sizeof(buf), "%04d%02d%02d", year, month, date); diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index b7f0bf795521..f2140fc06ba0 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -514,15 +514,19 @@ unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, EXPORT_SYMBOL_GPL(ata_acpi_gtm_xfermask); /** - * ata_acpi_cbl_80wire - Check for 80 wire cable + * ata_acpi_cbl_pata_type - Return PATA cable type * @ap: Port to check - * @gtm: GTM data to use * - * Return 1 if the @gtm indicates the BIOS selected an 80wire mode. + * Return ATA_CBL_PATA* according to the transfer mode selected by BIOS */ -int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm) +int ata_acpi_cbl_pata_type(struct ata_port *ap) { struct ata_device *dev; + int ret = ATA_CBL_PATA_UNK; + const struct ata_acpi_gtm *gtm = ata_acpi_init_gtm(ap); + + if (!gtm) + return ATA_CBL_PATA40; ata_for_each_dev(dev, &ap->link, ENABLED) { unsigned int xfer_mask, udma_mask; @@ -530,13 +534,17 @@ int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm) xfer_mask = ata_acpi_gtm_xfermask(dev, gtm); ata_unpack_xfermask(xfer_mask, NULL, NULL, &udma_mask); - if (udma_mask & ~ATA_UDMA_MASK_40C) - return 1; + ret = ATA_CBL_PATA40; + + if (udma_mask & ~ATA_UDMA_MASK_40C) { + ret = ATA_CBL_PATA80; + break; + } } - return 0; + return ret; } -EXPORT_SYMBOL_GPL(ata_acpi_cbl_80wire); +EXPORT_SYMBOL_GPL(ata_acpi_cbl_pata_type); static void ata_acpi_gtf_to_tf(struct ata_device *dev, const struct ata_acpi_gtf *gtf, diff --git a/drivers/ata/pata_cs5536.c b/drivers/ata/pata_cs5536.c index b811efd2cc34..73e81e160c91 100644 --- a/drivers/ata/pata_cs5536.c +++ b/drivers/ata/pata_cs5536.c @@ -27,7 +27,7 @@ #include <scsi/scsi_host.h> #include <linux/dmi.h> -#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86) && defined(CONFIG_X86_32) #include <asm/msr.h> static int use_msr; module_param_named(msr, use_msr, int, 0644); diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index fbf5f07ea357..f7a933eefe05 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -1298,7 +1298,7 @@ static int pata_macio_pci_attach(struct pci_dev *pdev, priv->dev = &pdev->dev; /* Get MMIO regions */ - if (pci_request_regions(pdev, "pata-macio")) { + if (pcim_request_all_regions(pdev, "pata-macio")) { dev_err(&pdev->dev, "Cannot obtain PCI resources\n"); return -EBUSY; diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index 696b99720dcb..bb80e7800dcb 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -201,11 +201,9 @@ static int via_cable_detect(struct ata_port *ap) { two drives */ if (ata66 & (0x10100000 >> (16 * ap->port_no))) return ATA_CBL_PATA80; + /* Check with ACPI so we can spot BIOS reported SATA bridges */ - if (ata_acpi_init_gtm(ap) && - ata_acpi_cbl_80wire(ap, ata_acpi_init_gtm(ap))) - return ATA_CBL_PATA80; - return ATA_CBL_PATA40; + return ata_acpi_cbl_pata_type(ap); } static int via_pre_reset(struct ata_link *link, unsigned long deadline) @@ -368,7 +366,8 @@ static unsigned int via_mode_filter(struct ata_device *dev, unsigned int mask) } if (dev->class == ATA_DEV_ATAPI && - dmi_check_system(no_atapi_dma_dmi_table)) { + (dmi_check_system(no_atapi_dma_dmi_table) || + config->id == PCI_DEVICE_ID_VIA_6415)) { ata_dev_warn(dev, "controller locks up on ATAPI DMA, forcing PIO\n"); mask &= ATA_MASK_PIO; } diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index d4aa0f353b6c..eeae160c898d 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -288,7 +288,9 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) struct sk_buff *new_skb; int result = 0; - if (!skb->len) return 0; + if (skb->len < sizeof(struct atmtcp_hdr)) + goto done; + dev = vcc->dev_data; hdr = (struct atmtcp_hdr *) skb->data; if (hdr->length == ATMTCP_HDR_MAGIC) { diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 1206ab764ba9..f2e91b7d79f0 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -852,6 +852,8 @@ queue_skb(struct idt77252_dev *card, struct vc_map *vc, IDT77252_PRV_PADDR(skb) = dma_map_single(&card->pcidev->dev, skb->data, skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(&card->pcidev->dev, IDT77252_PRV_PADDR(skb))) + return -ENOMEM; error = -EINVAL; @@ -1857,6 +1859,8 @@ add_rx_skb(struct idt77252_dev *card, int queue, paddr = dma_map_single(&card->pcidev->dev, skb->data, skb_end_pointer(skb) - skb->data, DMA_FROM_DEVICE); + if (dma_mapping_error(&card->pcidev->dev, paddr)) + goto outpoolrm; IDT77252_PRV_PADDR(skb) = paddr; if (push_rx_skb(card, skb, queue)) { @@ -1871,6 +1875,7 @@ outunmap: dma_unmap_single(&card->pcidev->dev, IDT77252_PRV_PADDR(skb), skb_end_pointer(skb) - skb->data, DMA_FROM_DEVICE); +outpoolrm: handle = IDT77252_PRV_POOL(skb); card->sbpool[POOL_QUEUE(handle)].skb[POOL_INDEX(handle)] = NULL; diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 7779ab0ca7ce..efc575a00edd 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -602,6 +602,7 @@ CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); CPU_SHOW_VULN_FALLBACK(ghostwrite); CPU_SHOW_VULN_FALLBACK(old_microcode); CPU_SHOW_VULN_FALLBACK(indirect_target_selection); +CPU_SHOW_VULN_FALLBACK(tsa); static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); @@ -620,6 +621,7 @@ static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL); static DEVICE_ATTR(old_microcode, 0444, cpu_show_old_microcode, NULL); static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL); +static DEVICE_ATTR(tsa, 0444, cpu_show_tsa, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -639,6 +641,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_ghostwrite.attr, &dev_attr_old_microcode.attr, &dev_attr_indirect_target_selection.attr, + &dev_attr_tsa.attr, NULL }; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index eebe699fdf4f..a6ab666ef48a 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1237,6 +1237,7 @@ void dpm_complete(pm_message_t state) void dpm_resume_end(pm_message_t state) { dpm_resume(state); + pm_restore_gfp_mask(); dpm_complete(state); } EXPORT_SYMBOL_GPL(dpm_resume_end); @@ -2176,8 +2177,10 @@ int dpm_suspend_start(pm_message_t state) error = dpm_prepare(state); if (error) dpm_save_failed_step(SUSPEND_PREPARE); - else + else { + pm_restrict_gfp_mask(); error = dpm_suspend(state); + } dpm_show_time(starttime, state, error, "start"); return error; diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 749ae1246f4c..d35caa3c69e1 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -80,6 +80,7 @@ enum { DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */ DEVFL_FREEING = (1<<7), /* set when device is being cleaned up */ DEVFL_FREED = (1<<8), /* device has been cleaned up */ + DEVFL_DEAD = (1<<9), /* device has timed out of aoe_deadsecs */ }; enum { diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 50cc90f6ab35..6298f8e271e3 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -754,7 +754,7 @@ rexmit_timer(struct timer_list *timer) utgts = count_targets(d, NULL); - if (d->flags & DEVFL_TKILL) { + if (d->flags & (DEVFL_TKILL | DEVFL_DEAD)) { spin_unlock_irqrestore(&d->lock, flags); return; } @@ -786,7 +786,8 @@ rexmit_timer(struct timer_list *timer) * to clean up. */ list_splice(&flist, &d->factive[0]); - aoedev_downdev(d); + d->flags |= DEVFL_DEAD; + queue_work(aoe_wq, &d->work); goto out; } @@ -898,6 +899,9 @@ aoecmd_sleepwork(struct work_struct *work) { struct aoedev *d = container_of(work, struct aoedev, work); + if (d->flags & DEVFL_DEAD) + aoedev_downdev(d); + if (d->flags & DEVFL_GDALLOC) aoeblk_gdalloc(d); diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index bba05f0c5bbd..3a240755045b 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -198,9 +198,13 @@ aoedev_downdev(struct aoedev *d) { struct aoetgt *t, **tt, **te; struct list_head *head, *pos, *nx; + struct request *rq, *rqnext; int i; + unsigned long flags; - d->flags &= ~DEVFL_UP; + spin_lock_irqsave(&d->lock, flags); + d->flags &= ~(DEVFL_UP | DEVFL_DEAD); + spin_unlock_irqrestore(&d->lock, flags); /* clean out active and to-be-retransmitted buffers */ for (i = 0; i < NFACTIVE; i++) { @@ -223,6 +227,13 @@ aoedev_downdev(struct aoedev *d) /* clean out the in-process request (if any) */ aoe_failip(d); + /* clean out any queued block requests */ + list_for_each_entry_safe(rq, rqnext, &d->rq_list, queuelist) { + list_del_init(&rq->queuelist); + blk_mq_start_request(rq); + blk_mq_end_request(rq, BLK_STS_IOERR); + } + /* fast fail all pending I/O */ if (d->blkq) { /* UP is cleared, freeze+quiesce to insure all are errored */ diff --git a/drivers/block/brd.c b/drivers/block/brd.c index b1be6c510372..0c2eabe14af3 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -64,13 +64,15 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector, rcu_read_unlock(); page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); - rcu_read_lock(); - if (!page) + if (!page) { + rcu_read_lock(); return ERR_PTR(-ENOMEM); + } xa_lock(&brd->brd_pages); ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL, page, gfp); + rcu_read_lock(); if (ret) { xa_unlock(&brd->brd_pages); __free_page(page); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 7bdc7eb808ea..2592bd19ebc1 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2198,9 +2198,7 @@ again: goto out; } } - ret = nbd_start_device(nbd); - if (ret) - goto out; + if (info->attrs[NBD_ATTR_BACKEND_IDENTIFIER]) { nbd->backend = nla_strdup(info->attrs[NBD_ATTR_BACKEND_IDENTIFIER], GFP_KERNEL); @@ -2216,6 +2214,8 @@ again: goto out; } set_bit(NBD_RT_HAS_BACKEND_FILE, &config->runtime_flags); + + ret = nbd_start_device(nbd); out: mutex_unlock(&nbd->config_lock); if (!ret) { diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index c637ea010d34..9fd284fa76dc 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1148,8 +1148,8 @@ exit: blk_mq_end_request(req, res); } -static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req, - int res, unsigned issue_flags) +static struct io_uring_cmd *__ublk_prep_compl_io_cmd(struct ublk_io *io, + struct request *req) { /* read cmd first because req will overwrite it */ struct io_uring_cmd *cmd = io->cmd; @@ -1164,6 +1164,13 @@ static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req, io->flags &= ~UBLK_IO_FLAG_ACTIVE; io->req = req; + return cmd; +} + +static void ublk_complete_io_cmd(struct ublk_io *io, struct request *req, + int res, unsigned issue_flags) +{ + struct io_uring_cmd *cmd = __ublk_prep_compl_io_cmd(io, req); /* tell ublksrv one io request is coming */ io_uring_cmd_done(cmd, res, 0, issue_flags); @@ -1416,6 +1423,14 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } +static inline bool ublk_belong_to_same_batch(const struct ublk_io *io, + const struct ublk_io *io2) +{ + return (io_uring_cmd_ctx_handle(io->cmd) == + io_uring_cmd_ctx_handle(io2->cmd)) && + (io->task == io2->task); +} + static void ublk_queue_rqs(struct rq_list *rqlist) { struct rq_list requeue_list = { }; @@ -1427,14 +1442,16 @@ static void ublk_queue_rqs(struct rq_list *rqlist) struct ublk_queue *this_q = req->mq_hctx->driver_data; struct ublk_io *this_io = &this_q->ios[req->tag]; - if (io && io->task != this_io->task && !rq_list_empty(&submit_list)) + if (ublk_prep_req(this_q, req, true) != BLK_STS_OK) { + rq_list_add_tail(&requeue_list, req); + continue; + } + + if (io && !ublk_belong_to_same_batch(io, this_io) && + !rq_list_empty(&submit_list)) ublk_queue_cmd_list(io, &submit_list); io = this_io; - - if (ublk_prep_req(this_q, req, true) == BLK_STS_OK) - rq_list_add_tail(&submit_list, req); - else - rq_list_add_tail(&requeue_list, req); + rq_list_add_tail(&submit_list, req); } if (!rq_list_empty(&submit_list)) @@ -2148,10 +2165,9 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq, return 0; } -static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io) +static bool ublk_get_data(const struct ublk_queue *ubq, struct ublk_io *io, + struct request *req) { - struct request *req = io->req; - /* * We have handled UBLK_IO_NEED_GET_DATA command, * so clear UBLK_IO_FLAG_NEED_GET_DATA now and just @@ -2178,6 +2194,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, u32 cmd_op = cmd->cmd_op; unsigned tag = ub_cmd->tag; int ret = -EINVAL; + struct request *req; pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n", __func__, cmd->cmd_op, ub_cmd->q_id, tag, @@ -2236,11 +2253,19 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, goto out; break; case UBLK_IO_NEED_GET_DATA: - io->addr = ub_cmd->addr; - if (!ublk_get_data(ubq, io)) - return -EIOCBQUEUED; - - return UBLK_IO_RES_OK; + /* + * ublk_get_data() may fail and fallback to requeue, so keep + * uring_cmd active first and prepare for handling new requeued + * request + */ + req = io->req; + ublk_fill_io_cmd(io, cmd, ub_cmd->addr); + io->flags &= ~UBLK_IO_FLAG_OWNED_BY_SRV; + if (likely(ublk_get_data(ubq, io, req))) { + __ublk_prep_compl_io_cmd(io, req); + return UBLK_IO_RES_OK; + } + break; default: goto out; } @@ -2825,6 +2850,10 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) if (copy_from_user(&info, argp, sizeof(info))) return -EFAULT; + if (info.queue_depth > UBLK_MAX_QUEUE_DEPTH || !info.queue_depth || + info.nr_hw_queues > UBLK_MAX_NR_QUEUES || !info.nr_hw_queues) + return -EINVAL; + if (capable(CAP_SYS_ADMIN)) info.flags &= ~UBLK_F_UNPRIVILEGED_DEV; else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV)) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 563165c5efae..e1c688dd2d45 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -2033,6 +2033,28 @@ static void btintel_pcie_release_hdev(struct btintel_pcie_data *data) data->hdev = NULL; } +static void btintel_pcie_disable_interrupts(struct btintel_pcie_data *data) +{ + spin_lock(&data->irq_lock); + btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_FH_INT_MASK, data->fh_init_mask); + btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_HW_INT_MASK, data->hw_init_mask); + spin_unlock(&data->irq_lock); +} + +static void btintel_pcie_enable_interrupts(struct btintel_pcie_data *data) +{ + spin_lock(&data->irq_lock); + btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_FH_INT_MASK, ~data->fh_init_mask); + btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_HW_INT_MASK, ~data->hw_init_mask); + spin_unlock(&data->irq_lock); +} + +static void btintel_pcie_synchronize_irqs(struct btintel_pcie_data *data) +{ + for (int i = 0; i < data->alloc_vecs; i++) + synchronize_irq(data->msix_entries[i].vector); +} + static int btintel_pcie_setup_internal(struct hci_dev *hdev) { struct btintel_pcie_data *data = hci_get_drvdata(hdev); @@ -2152,6 +2174,8 @@ static int btintel_pcie_setup(struct hci_dev *hdev) bt_dev_err(hdev, "Firmware download retry count: %d", fw_dl_retry); btintel_pcie_dump_debug_registers(hdev); + btintel_pcie_disable_interrupts(data); + btintel_pcie_synchronize_irqs(data); err = btintel_pcie_reset_bt(data); if (err) { bt_dev_err(hdev, "Failed to do shr reset: %d", err); @@ -2159,6 +2183,7 @@ static int btintel_pcie_setup(struct hci_dev *hdev) } usleep_range(10000, 12000); btintel_pcie_reset_ia(data); + btintel_pcie_enable_interrupts(data); btintel_pcie_config_msix(data); err = btintel_pcie_enable_bt(data); if (err) { @@ -2291,6 +2316,12 @@ static void btintel_pcie_remove(struct pci_dev *pdev) data = pci_get_drvdata(pdev); + btintel_pcie_disable_interrupts(data); + + btintel_pcie_synchronize_irqs(data); + + flush_work(&data->rx_work); + btintel_pcie_reset_bt(data); for (int i = 0; i < data->alloc_vecs; i++) { struct msix_entry *msix_entry; @@ -2303,8 +2334,6 @@ static void btintel_pcie_remove(struct pci_dev *pdev) btintel_pcie_release_hdev(data); - flush_work(&data->rx_work); - destroy_workqueue(data->workqueue); btintel_pcie_free(data); diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 5fe5879881f5..3ec0be496820 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2392,10 +2392,17 @@ static int qca_serdev_probe(struct serdev_device *serdev) */ qcadev->bt_power->pwrseq = devm_pwrseq_get(&serdev->dev, "bluetooth"); - if (IS_ERR(qcadev->bt_power->pwrseq)) - return PTR_ERR(qcadev->bt_power->pwrseq); - break; + /* + * Some modules have BT_EN enabled via a hardware pull-up, + * meaning it is not defined in the DTS and is not controlled + * through the power sequence. In such cases, fall through + * to follow the legacy flow. + */ + if (IS_ERR(qcadev->bt_power->pwrseq)) + qcadev->bt_power->pwrseq = NULL; + else + break; } fallthrough; case QCA_WCN3950: diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index bf490967241a..2505df1f4e69 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -720,11 +720,6 @@ static const struct pci_device_id agp_amd64_pci_table[] = { MODULE_DEVICE_TABLE(pci, agp_amd64_pci_table); -static const struct pci_device_id agp_amd64_pci_promisc_table[] = { - { PCI_DEVICE_CLASS(0, 0) }, - { } -}; - static DEFINE_SIMPLE_DEV_PM_OPS(agp_amd64_pm_ops, NULL, agp_amd64_resume); static struct pci_driver agp_amd64_pci_driver = { @@ -739,6 +734,7 @@ static struct pci_driver agp_amd64_pci_driver = { /* Not static due to IOMMU code calling it early. */ int __init agp_amd64_init(void) { + struct pci_dev *pdev = NULL; int err = 0; if (agp_off) @@ -767,9 +763,13 @@ int __init agp_amd64_init(void) } /* Look for any AGP bridge */ - agp_amd64_pci_driver.id_table = agp_amd64_pci_promisc_table; - err = driver_attach(&agp_amd64_pci_driver.driver); - if (err == 0 && agp_bridges_found == 0) { + for_each_pci_dev(pdev) + if (pci_find_capability(pdev, PCI_CAP_ID_AGP)) + pci_add_dynid(&agp_amd64_pci_driver, + pdev->vendor, pdev->device, + pdev->subsystem_vendor, + pdev->subsystem_device, 0, 0, 0); + if (agp_bridges_found == 0) { pci_unregister_driver(&agp_amd64_pci_driver); err = -ENODEV; } diff --git a/drivers/clk/clk-scmi.c b/drivers/clk/clk-scmi.c index 15510c2ff21c..1b1561c84127 100644 --- a/drivers/clk/clk-scmi.c +++ b/drivers/clk/clk-scmi.c @@ -404,6 +404,7 @@ static int scmi_clocks_probe(struct scmi_device *sdev) const struct scmi_handle *handle = sdev->handle; struct scmi_protocol_handle *ph; const struct clk_ops *scmi_clk_ops_db[SCMI_MAX_CLK_OPS] = {}; + struct scmi_clk *sclks; if (!handle) return -ENODEV; @@ -430,18 +431,21 @@ static int scmi_clocks_probe(struct scmi_device *sdev) transport_is_atomic = handle->is_transport_atomic(handle, &atomic_threshold_us); + sclks = devm_kcalloc(dev, count, sizeof(*sclks), GFP_KERNEL); + if (!sclks) + return -ENOMEM; + + for (idx = 0; idx < count; idx++) + hws[idx] = &sclks[idx].hw; + for (idx = 0; idx < count; idx++) { - struct scmi_clk *sclk; + struct scmi_clk *sclk = &sclks[idx]; const struct clk_ops *scmi_ops; - sclk = devm_kzalloc(dev, sizeof(*sclk), GFP_KERNEL); - if (!sclk) - return -ENOMEM; - sclk->info = scmi_proto_clk_ops->info_get(ph, idx); if (!sclk->info) { dev_dbg(dev, "invalid clock info for idx %d\n", idx); - devm_kfree(dev, sclk); + hws[idx] = NULL; continue; } @@ -479,13 +483,11 @@ static int scmi_clocks_probe(struct scmi_device *sdev) if (err) { dev_err(dev, "failed to register clock %d\n", idx); devm_kfree(dev, sclk->parent_data); - devm_kfree(dev, sclk); hws[idx] = NULL; } else { dev_dbg(dev, "Registered clock:%s%s\n", sclk->info->name, scmi_ops->enable ? " (atomic ops)" : ""); - hws[idx] = &sclk->hw; } } diff --git a/drivers/clk/imx/clk-imx95-blk-ctl.c b/drivers/clk/imx/clk-imx95-blk-ctl.c index 25974947ad0c..cc2ee2be1819 100644 --- a/drivers/clk/imx/clk-imx95-blk-ctl.c +++ b/drivers/clk/imx/clk-imx95-blk-ctl.c @@ -219,11 +219,15 @@ static const struct imx95_blk_ctl_dev_data lvds_csr_dev_data = { .clk_reg_offset = 0, }; +static const char * const disp_engine_parents[] = { + "videopll1", "dsi_pll", "ldb_pll_div7" +}; + static const struct imx95_blk_ctl_clk_dev_data dispmix_csr_clk_dev_data[] = { [IMX95_CLK_DISPMIX_ENG0_SEL] = { .name = "disp_engine0_sel", - .parent_names = (const char *[]){"videopll1", "dsi_pll", "ldb_pll_div7", }, - .num_parents = 4, + .parent_names = disp_engine_parents, + .num_parents = ARRAY_SIZE(disp_engine_parents), .reg = 0, .bit_idx = 0, .bit_width = 2, @@ -232,8 +236,8 @@ static const struct imx95_blk_ctl_clk_dev_data dispmix_csr_clk_dev_data[] = { }, [IMX95_CLK_DISPMIX_ENG1_SEL] = { .name = "disp_engine1_sel", - .parent_names = (const char *[]){"videopll1", "dsi_pll", "ldb_pll_div7", }, - .num_parents = 4, + .parent_names = disp_engine_parents, + .num_parents = ARRAY_SIZE(disp_engine_parents), .reg = 0, .bit_idx = 2, .bit_width = 2, diff --git a/drivers/cxl/core/edac.c b/drivers/cxl/core/edac.c index 2cbc664e5d62..623aaa4439c4 100644 --- a/drivers/cxl/core/edac.c +++ b/drivers/cxl/core/edac.c @@ -103,10 +103,10 @@ static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx, u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle) { struct cxl_mailbox *cxl_mbox; - u8 min_scrub_cycle = U8_MAX; struct cxl_region_params *p; struct cxl_memdev *cxlmd; struct cxl_region *cxlr; + u8 min_scrub_cycle = 0; int i, ret; if (!cxl_ps_ctx->cxlr) { @@ -133,8 +133,12 @@ static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx, if (ret) return ret; + /* + * The min_scrub_cycle of a region is the max of minimum scrub + * cycles supported by memdevs that back the region. + */ if (min_cycle) - min_scrub_cycle = min(*min_cycle, min_scrub_cycle); + min_scrub_cycle = max(*min_cycle, min_scrub_cycle); } if (min_cycle) @@ -1099,8 +1103,10 @@ int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt) old_rec = xa_store(&array_rec->rec_gen_media, le64_to_cpu(rec->media_hdr.phys_addr), rec, GFP_KERNEL); - if (xa_is_err(old_rec)) + if (xa_is_err(old_rec)) { + kfree(rec); return xa_err(old_rec); + } kfree(old_rec); @@ -1127,8 +1133,10 @@ int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt) old_rec = xa_store(&array_rec->rec_dram, le64_to_cpu(rec->media_hdr.phys_addr), rec, GFP_KERNEL); - if (xa_is_err(old_rec)) + if (xa_is_err(old_rec)) { + kfree(rec); return xa_err(old_rec); + } kfree(old_rec); @@ -1315,7 +1323,7 @@ cxl_mem_get_rec_dram(struct cxl_memdev *cxlmd, attrbs.bank = ctx->bank; break; case EDAC_REPAIR_RANK_SPARING: - attrbs.repair_type = CXL_BANK_SPARING; + attrbs.repair_type = CXL_RANK_SPARING; break; default: return NULL; diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c index 6f2eae1eb126..7c750599ea69 100644 --- a/drivers/cxl/core/features.c +++ b/drivers/cxl/core/features.c @@ -544,7 +544,7 @@ static bool cxlctl_validate_set_features(struct cxl_features_state *cxlfs, u32 flags; if (rpc_in->op_size < sizeof(uuid_t)) - return ERR_PTR(-EINVAL); + return false; feat = cxl_feature_info(cxlfs, &rpc_in->set_feat_in.uuid); if (IS_ERR(feat)) diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 485a831695c7..2731ba3a0799 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -31,40 +31,38 @@ static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev, ras_cap.header_log); } -static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev, - struct cxl_ras_capability_regs ras_cap) +static void cxl_cper_trace_corr_prot_err(struct cxl_memdev *cxlmd, + struct cxl_ras_capability_regs ras_cap) { u32 status = ras_cap.cor_status & ~ras_cap.cor_mask; - struct cxl_dev_state *cxlds; - cxlds = pci_get_drvdata(pdev); - if (!cxlds) - return; - - trace_cxl_aer_correctable_error(cxlds->cxlmd, status); + trace_cxl_aer_correctable_error(cxlmd, status); } -static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev, - struct cxl_ras_capability_regs ras_cap) +static void +cxl_cper_trace_uncorr_prot_err(struct cxl_memdev *cxlmd, + struct cxl_ras_capability_regs ras_cap) { u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask; - struct cxl_dev_state *cxlds; u32 fe; - cxlds = pci_get_drvdata(pdev); - if (!cxlds) - return; - if (hweight32(status) > 1) fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, ras_cap.cap_control)); else fe = status; - trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, + trace_cxl_aer_uncorrectable_error(cxlmd, status, fe, ras_cap.header_log); } +static int match_memdev_by_parent(struct device *dev, const void *uport) +{ + if (is_cxl_memdev(dev) && dev->parent == uport) + return 1; + return 0; +} + static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) { unsigned int devfn = PCI_DEVFN(data->prot_err.agent_addr.device, @@ -73,13 +71,12 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment, data->prot_err.agent_addr.bus, devfn); + struct cxl_memdev *cxlmd; int port_type; if (!pdev) return; - guard(device)(&pdev->dev); - port_type = pci_pcie_type(pdev); if (port_type == PCI_EXP_TYPE_ROOT_PORT || port_type == PCI_EXP_TYPE_DOWNSTREAM || @@ -92,10 +89,20 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data) return; } + guard(device)(&pdev->dev); + if (!pdev->dev.driver) + return; + + struct device *mem_dev __free(put_device) = bus_find_device( + &cxl_bus_type, NULL, pdev, match_memdev_by_parent); + if (!mem_dev) + return; + + cxlmd = to_cxl_memdev(mem_dev); if (data->severity == AER_CORRECTABLE) - cxl_cper_trace_corr_prot_err(pdev, data->ras_cap); + cxl_cper_trace_corr_prot_err(cxlmd, data->ras_cap); else - cxl_cper_trace_uncorr_prot_err(pdev, data->ras_cap); + cxl_cper_trace_uncorr_prot_err(cxlmd, data->ras_cap); } static void cxl_cper_prot_err_work_fn(struct work_struct *work) diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index b1ef4546346d..bea3e9858aca 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -685,11 +685,13 @@ long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage, dma_resv_iter_begin(&cursor, obj, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { - ret = dma_fence_wait_timeout(fence, intr, ret); - if (ret <= 0) { - dma_resv_iter_end(&cursor); - return ret; - } + ret = dma_fence_wait_timeout(fence, intr, timeout); + if (ret <= 0) + break; + + /* Even for zero timeout the return value is 1 */ + if (timeout) + timeout = ret; } dma_resv_iter_end(&cursor); diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 58b1482a0fbb..07f1e9dc1ca7 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1209,7 +1209,9 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) if (csrow_enabled(2 * dimm + 1, ctrl, pvt)) cs_mode |= CS_ODD_PRIMARY; - /* Asymmetric dual-rank DIMM support. */ + if (csrow_sec_enabled(2 * dimm, ctrl, pvt)) + cs_mode |= CS_EVEN_SECONDARY; + if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt)) cs_mode |= CS_ODD_SECONDARY; @@ -1230,12 +1232,13 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) return cs_mode; } -static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode, - int csrow_nr, int dimm) +static int calculate_cs_size(u32 mask, unsigned int cs_mode) { - u32 msb, weight, num_zero_bits; - u32 addr_mask_deinterleaved; - int size = 0; + int msb, weight, num_zero_bits; + u32 deinterleaved_mask; + + if (!mask) + return 0; /* * The number of zero bits in the mask is equal to the number of bits @@ -1248,19 +1251,30 @@ static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode, * without swapping with the most significant bit. This can be handled * by keeping the MSB where it is and ignoring the single zero bit. */ - msb = fls(addr_mask_orig) - 1; - weight = hweight_long(addr_mask_orig); + msb = fls(mask) - 1; + weight = hweight_long(mask); num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE); /* Take the number of zero bits off from the top of the mask. */ - addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1); + deinterleaved_mask = GENMASK(msb - num_zero_bits, 1); + edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", deinterleaved_mask); + + return (deinterleaved_mask >> 2) + 1; +} + +static int __addr_mask_to_cs_size(u32 addr_mask, u32 addr_mask_sec, + unsigned int cs_mode, int csrow_nr, int dimm) +{ + int size; edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm); - edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig); - edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved); + edac_dbg(1, " Primary AddrMask: 0x%x\n", addr_mask); /* Register [31:1] = Address [39:9]. Size is in kBs here. */ - size = (addr_mask_deinterleaved >> 2) + 1; + size = calculate_cs_size(addr_mask, cs_mode); + + edac_dbg(1, " Secondary AddrMask: 0x%x\n", addr_mask_sec); + size += calculate_cs_size(addr_mask_sec, cs_mode); /* Return size in MBs. */ return size >> 10; @@ -1269,8 +1283,8 @@ static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode, static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, unsigned int cs_mode, int csrow_nr) { + u32 addr_mask = 0, addr_mask_sec = 0; int cs_mask_nr = csrow_nr; - u32 addr_mask_orig; int dimm, size = 0; /* No Chip Selects are enabled. */ @@ -1308,13 +1322,13 @@ static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, if (!pvt->flags.zn_regs_v2) cs_mask_nr >>= 1; - /* Asymmetric dual-rank DIMM support. */ - if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY)) - addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr]; - else - addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr]; + if (cs_mode & (CS_EVEN_PRIMARY | CS_ODD_PRIMARY)) + addr_mask = pvt->csels[umc].csmasks[cs_mask_nr]; - return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, dimm); + if (cs_mode & (CS_EVEN_SECONDARY | CS_ODD_SECONDARY)) + addr_mask_sec = pvt->csels[umc].csmasks_sec[cs_mask_nr]; + + return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, dimm); } static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) @@ -3512,9 +3526,10 @@ static void gpu_get_err_info(struct mce *m, struct err_info *err) static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, unsigned int cs_mode, int csrow_nr) { - u32 addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr]; + u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr]; + u32 addr_mask_sec = pvt->csels[umc].csmasks_sec[csrow_nr]; - return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, csrow_nr >> 1); + return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, csrow_nr >> 1); } static void gpu_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) @@ -3879,6 +3894,7 @@ static int per_family_init(struct amd64_pvt *pvt) break; case 0x70 ... 0x7f: pvt->ctl_name = "F19h_M70h"; + pvt->max_mcs = 4; pvt->flags.zn_regs_v2 = 1; break; case 0x90 ... 0x9f: diff --git a/drivers/edac/ecs.c b/drivers/edac/ecs.c index 1d51838a60c1..51c451c7f0f0 100755 --- a/drivers/edac/ecs.c +++ b/drivers/edac/ecs.c @@ -170,8 +170,10 @@ static int ecs_create_desc(struct device *ecs_dev, const struct attribute_group fru_ctx->dev_attr[ECS_RESET] = EDAC_ECS_ATTR_WO(reset, fru); fru_ctx->dev_attr[ECS_THRESHOLD] = EDAC_ECS_ATTR_RW(threshold, fru); - for (i = 0; i < ECS_MAX_ATTRS; i++) + for (i = 0; i < ECS_MAX_ATTRS; i++) { + sysfs_attr_init(&fru_ctx->dev_attr[i].dev_attr.attr); fru_ctx->ecs_attrs[i] = &fru_ctx->dev_attr[i].dev_attr.attr; + } sprintf(fru_ctx->name, "%s%d", EDAC_ECS_FRU_NAME, fru); group->name = fru_ctx->name; diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index 1930dc00c791..1cb5c67e78ae 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -125,7 +125,7 @@ #define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) #define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) -static const struct res_config { +static struct res_config { bool machine_check; /* The number of present memory controllers. */ int num_imc; @@ -479,7 +479,7 @@ static u64 rpl_p_err_addr(u64 ecclog) return ECC_ERROR_LOG_ADDR45(ecclog); } -static const struct res_config ehl_cfg = { +static struct res_config ehl_cfg = { .num_imc = 1, .imc_base = 0x5000, .ibecc_base = 0xdc00, @@ -489,7 +489,7 @@ static const struct res_config ehl_cfg = { .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, }; -static const struct res_config icl_cfg = { +static struct res_config icl_cfg = { .num_imc = 1, .imc_base = 0x5000, .ibecc_base = 0xd800, @@ -499,7 +499,7 @@ static const struct res_config icl_cfg = { .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, }; -static const struct res_config tgl_cfg = { +static struct res_config tgl_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0x5000, @@ -513,7 +513,7 @@ static const struct res_config tgl_cfg = { .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr, }; -static const struct res_config adl_cfg = { +static struct res_config adl_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -524,7 +524,7 @@ static const struct res_config adl_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static const struct res_config adl_n_cfg = { +static struct res_config adl_n_cfg = { .machine_check = true, .num_imc = 1, .imc_base = 0xd800, @@ -535,7 +535,7 @@ static const struct res_config adl_n_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static const struct res_config rpl_p_cfg = { +static struct res_config rpl_p_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -547,7 +547,7 @@ static const struct res_config rpl_p_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static const struct res_config mtl_ps_cfg = { +static struct res_config mtl_ps_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -558,7 +558,7 @@ static const struct res_config mtl_ps_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static const struct res_config mtl_p_cfg = { +static struct res_config mtl_p_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -569,7 +569,7 @@ static const struct res_config mtl_p_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static const struct pci_device_id igen6_pci_tbl[] = { +static struct pci_device_id igen6_pci_tbl[] = { { PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg }, { PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg }, { PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg }, @@ -1350,9 +1350,11 @@ static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar) return -ENODEV; } - if (lmc < res_cfg->num_imc) + if (lmc < res_cfg->num_imc) { igen6_printk(KERN_WARNING, "Expected %d mcs, but only %d detected.", res_cfg->num_imc, lmc); + res_cfg->num_imc = lmc; + } return 0; diff --git a/drivers/edac/mem_repair.c b/drivers/edac/mem_repair.c index d1a8caa85369..70a033a76233 100755 --- a/drivers/edac/mem_repair.c +++ b/drivers/edac/mem_repair.c @@ -333,6 +333,7 @@ static int mem_repair_create_desc(struct device *dev, for (i = 0; i < MR_MAX_ATTRS; i++) { memcpy(&ctx->mem_repair_dev_attr[i], &dev_attr[i], sizeof(dev_attr[i])); + sysfs_attr_init(&ctx->mem_repair_dev_attr[i].dev_attr.attr); ctx->mem_repair_attrs[i] = &ctx->mem_repair_dev_attr[i].dev_attr.attr; } diff --git a/drivers/edac/scrub.c b/drivers/edac/scrub.c index e421d3ebd959..f9d02af2fc3a 100755 --- a/drivers/edac/scrub.c +++ b/drivers/edac/scrub.c @@ -176,6 +176,7 @@ static int scrub_create_desc(struct device *scrub_dev, group = &scrub_ctx->group; for (i = 0; i < SCRUB_MAX_ATTRS; i++) { memcpy(&scrub_ctx->scrub_dev_attr[i], &dev_attr[i], sizeof(dev_attr[i])); + sysfs_attr_init(&scrub_ctx->scrub_dev_attr[i].dev_attr.attr); scrub_ctx->scrub_attrs[i] = &scrub_ctx->scrub_dev_attr[i].dev_attr.attr; } sprintf(scrub_ctx->name, "%s%d", "scrub", instance); diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index fe55613a8ea9..37eb2e6c2f9f 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -110,7 +110,7 @@ struct ffa_drv_info { struct work_struct sched_recv_irq_work; struct xarray partition_info; DECLARE_HASHTABLE(notifier_hash, ilog2(FFA_MAX_NOTIFICATIONS)); - struct mutex notify_lock; /* lock to protect notifier hashtable */ + rwlock_t notify_lock; /* lock to protect notifier hashtable */ }; static struct ffa_drv_info *drv_info; @@ -1250,13 +1250,12 @@ notifier_hnode_get_by_type(u16 notify_id, enum notify_type type) return NULL; } -static int -update_notifier_cb(struct ffa_device *dev, int notify_id, void *cb, - void *cb_data, bool is_registration, bool is_framework) +static int update_notifier_cb(struct ffa_device *dev, int notify_id, + struct notifier_cb_info *cb, bool is_framework) { struct notifier_cb_info *cb_info = NULL; enum notify_type type = ffa_notify_type_get(dev->vm_id); - bool cb_found; + bool cb_found, is_registration = !!cb; if (is_framework) cb_info = notifier_hnode_get_by_vmid_uuid(notify_id, dev->vm_id, @@ -1270,20 +1269,10 @@ update_notifier_cb(struct ffa_device *dev, int notify_id, void *cb, return -EINVAL; if (is_registration) { - cb_info = kzalloc(sizeof(*cb_info), GFP_KERNEL); - if (!cb_info) - return -ENOMEM; - - cb_info->dev = dev; - cb_info->cb_data = cb_data; - if (is_framework) - cb_info->fwk_cb = cb; - else - cb_info->cb = cb; - - hash_add(drv_info->notifier_hash, &cb_info->hnode, notify_id); + hash_add(drv_info->notifier_hash, &cb->hnode, notify_id); } else { hash_del(&cb_info->hnode); + kfree(cb_info); } return 0; @@ -1300,20 +1289,19 @@ static int __ffa_notify_relinquish(struct ffa_device *dev, int notify_id, if (notify_id >= FFA_MAX_NOTIFICATIONS) return -EINVAL; - mutex_lock(&drv_info->notify_lock); + write_lock(&drv_info->notify_lock); - rc = update_notifier_cb(dev, notify_id, NULL, NULL, false, - is_framework); + rc = update_notifier_cb(dev, notify_id, NULL, is_framework); if (rc) { pr_err("Could not unregister notification callback\n"); - mutex_unlock(&drv_info->notify_lock); + write_unlock(&drv_info->notify_lock); return rc; } if (!is_framework) rc = ffa_notification_unbind(dev->vm_id, BIT(notify_id)); - mutex_unlock(&drv_info->notify_lock); + write_unlock(&drv_info->notify_lock); return rc; } @@ -1334,6 +1322,7 @@ static int __ffa_notify_request(struct ffa_device *dev, bool is_per_vcpu, { int rc; u32 flags = 0; + struct notifier_cb_info *cb_info = NULL; if (ffa_notifications_disabled()) return -EOPNOTSUPP; @@ -1341,28 +1330,40 @@ static int __ffa_notify_request(struct ffa_device *dev, bool is_per_vcpu, if (notify_id >= FFA_MAX_NOTIFICATIONS) return -EINVAL; - mutex_lock(&drv_info->notify_lock); + cb_info = kzalloc(sizeof(*cb_info), GFP_KERNEL); + if (!cb_info) + return -ENOMEM; + + cb_info->dev = dev; + cb_info->cb_data = cb_data; + if (is_framework) + cb_info->fwk_cb = cb; + else + cb_info->cb = cb; + + write_lock(&drv_info->notify_lock); if (!is_framework) { if (is_per_vcpu) flags = PER_VCPU_NOTIFICATION_FLAG; rc = ffa_notification_bind(dev->vm_id, BIT(notify_id), flags); - if (rc) { - mutex_unlock(&drv_info->notify_lock); - return rc; - } + if (rc) + goto out_unlock_free; } - rc = update_notifier_cb(dev, notify_id, cb, cb_data, true, - is_framework); + rc = update_notifier_cb(dev, notify_id, cb_info, is_framework); if (rc) { pr_err("Failed to register callback for %d - %d\n", notify_id, rc); if (!is_framework) ffa_notification_unbind(dev->vm_id, BIT(notify_id)); } - mutex_unlock(&drv_info->notify_lock); + +out_unlock_free: + write_unlock(&drv_info->notify_lock); + if (rc) + kfree(cb_info); return rc; } @@ -1406,9 +1407,9 @@ static void handle_notif_callbacks(u64 bitmap, enum notify_type type) if (!(bitmap & 1)) continue; - mutex_lock(&drv_info->notify_lock); + read_lock(&drv_info->notify_lock); cb_info = notifier_hnode_get_by_type(notify_id, type); - mutex_unlock(&drv_info->notify_lock); + read_unlock(&drv_info->notify_lock); if (cb_info && cb_info->cb) cb_info->cb(notify_id, cb_info->cb_data); @@ -1446,9 +1447,9 @@ static void handle_fwk_notif_callbacks(u32 bitmap) ffa_rx_release(); - mutex_lock(&drv_info->notify_lock); + read_lock(&drv_info->notify_lock); cb_info = notifier_hnode_get_by_vmid_uuid(notify_id, target, &uuid); - mutex_unlock(&drv_info->notify_lock); + read_unlock(&drv_info->notify_lock); if (cb_info && cb_info->fwk_cb) cb_info->fwk_cb(notify_id, cb_info->cb_data, buf); @@ -1973,7 +1974,7 @@ static void ffa_notifications_setup(void) goto cleanup; hash_init(drv_info->notifier_hash); - mutex_init(&drv_info->notify_lock); + rwlock_init(&drv_info->notify_lock); drv_info->notif_enabled = true; return; diff --git a/drivers/firmware/efi/libstub/zboot.lds b/drivers/firmware/efi/libstub/zboot.lds index c3a166675450..367907eb7d86 100644 --- a/drivers/firmware/efi/libstub/zboot.lds +++ b/drivers/firmware/efi/libstub/zboot.lds @@ -29,14 +29,12 @@ SECTIONS . = _etext; } -#ifdef CONFIG_EFI_SBAT .sbat : ALIGN(4096) { _sbat = .; *(.sbat) _esbat = ALIGN(4096); . = _esbat; } -#endif .data : ALIGN(4096) { _data = .; @@ -60,6 +58,6 @@ SECTIONS PROVIDE(__efistub__gzdata_size = ABSOLUTE(__efistub__gzdata_end - __efistub__gzdata_start)); -PROVIDE(__data_rawsize = ABSOLUTE(_edata - _etext)); -PROVIDE(__data_size = ABSOLUTE(_end - _etext)); +PROVIDE(__data_rawsize = ABSOLUTE(_edata - _data)); +PROVIDE(__data_size = ABSOLUTE(_end - _data)); PROVIDE(__sbat_size = ABSOLUTE(_esbat - _sbat)); diff --git a/drivers/firmware/samsung/exynos-acpm.c b/drivers/firmware/samsung/exynos-acpm.c index e02f14f4bd7c..3a69fe3234c7 100644 --- a/drivers/firmware/samsung/exynos-acpm.c +++ b/drivers/firmware/samsung/exynos-acpm.c @@ -430,6 +430,9 @@ int acpm_do_xfer(const struct acpm_handle *handle, const struct acpm_xfer *xfer) return -EOPNOTSUPP; } + msg.chan_id = xfer->acpm_chan_id; + msg.chan_type = EXYNOS_MBOX_CHAN_TYPE_DOORBELL; + scoped_guard(mutex, &achan->tx_lock) { tx_front = readl(achan->tx.front); idx = (tx_front + 1) % achan->qlen; @@ -446,25 +449,15 @@ int acpm_do_xfer(const struct acpm_handle *handle, const struct acpm_xfer *xfer) /* Advance TX front. */ writel(idx, achan->tx.front); - } - msg.chan_id = xfer->acpm_chan_id; - msg.chan_type = EXYNOS_MBOX_CHAN_TYPE_DOORBELL; - ret = mbox_send_message(achan->chan, (void *)&msg); - if (ret < 0) - return ret; - - ret = acpm_wait_for_message_response(achan, xfer); + ret = mbox_send_message(achan->chan, (void *)&msg); + if (ret < 0) + return ret; - /* - * NOTE: we might prefer not to need the mailbox ticker to manage the - * transfer queueing since the protocol layer queues things by itself. - * Unfortunately, we have to kick the mailbox framework after we have - * received our message. - */ - mbox_client_txdone(achan->chan, ret); + mbox_client_txdone(achan->chan, 0); + } - return ret; + return acpm_wait_for_message_response(achan, xfer); } /** diff --git a/drivers/gpio/gpio-loongson-64bit.c b/drivers/gpio/gpio-loongson-64bit.c index 26227669f026..70a01c5b8ad1 100644 --- a/drivers/gpio/gpio-loongson-64bit.c +++ b/drivers/gpio/gpio-loongson-64bit.c @@ -268,7 +268,7 @@ static const struct loongson_gpio_chip_data loongson_gpio_ls7a2000_data0 = { /* LS7A2000 ACPI GPIO */ static const struct loongson_gpio_chip_data loongson_gpio_ls7a2000_data1 = { .label = "ls7a2000_gpio", - .mode = BYTE_CTRL_MODE, + .mode = BIT_CTRL_MODE, .conf_offset = 0x4, .in_offset = 0x8, .out_offset = 0x0, diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c index 10ea71273c89..9875e34bde72 100644 --- a/drivers/gpio/gpio-mlxbf3.c +++ b/drivers/gpio/gpio-mlxbf3.c @@ -190,7 +190,9 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) struct mlxbf3_gpio_context *gs; struct gpio_irq_chip *girq; struct gpio_chip *gc; + char *colon_ptr; int ret, irq; + long num; gs = devm_kzalloc(dev, sizeof(*gs), GFP_KERNEL); if (!gs) @@ -227,25 +229,39 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) gc->owner = THIS_MODULE; gc->add_pin_ranges = mlxbf3_gpio_add_pin_ranges; - irq = platform_get_irq(pdev, 0); - if (irq >= 0) { - girq = &gs->gc.irq; - gpio_irq_chip_set_chip(girq, &gpio_mlxbf3_irqchip); - girq->default_type = IRQ_TYPE_NONE; - /* This will let us handle the parent IRQ in the driver */ - girq->num_parents = 0; - girq->parents = NULL; - girq->parent_handler = NULL; - girq->handler = handle_bad_irq; - - /* - * Directly request the irq here instead of passing - * a flow-handler because the irq is shared. - */ - ret = devm_request_irq(dev, irq, mlxbf3_gpio_irq_handler, - IRQF_SHARED, dev_name(dev), gs); - if (ret) - return dev_err_probe(dev, ret, "failed to request IRQ"); + colon_ptr = strchr(dev_name(dev), ':'); + if (!colon_ptr) { + dev_err(dev, "invalid device name format\n"); + return -EINVAL; + } + + ret = kstrtol(++colon_ptr, 16, &num); + if (ret) { + dev_err(dev, "invalid device instance\n"); + return ret; + } + + if (!num) { + irq = platform_get_irq(pdev, 0); + if (irq >= 0) { + girq = &gs->gc.irq; + gpio_irq_chip_set_chip(girq, &gpio_mlxbf3_irqchip); + girq->default_type = IRQ_TYPE_NONE; + /* This will let us handle the parent IRQ in the driver */ + girq->num_parents = 0; + girq->parents = NULL; + girq->parent_handler = NULL; + girq->handler = handle_bad_irq; + + /* + * Directly request the irq here instead of passing + * a flow-handler because the irq is shared. + */ + ret = devm_request_irq(dev, irq, mlxbf3_gpio_irq_handler, + IRQF_SHARED, dev_name(dev), gs); + if (ret) + return dev_err_probe(dev, ret, "failed to request IRQ"); + } } platform_set_drvdata(pdev, gs); diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index b852e4997629..e80a96f39788 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -974,7 +974,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, int irq_base) IRQF_ONESHOT | IRQF_SHARED, dev_name(dev), chip); if (ret) - return dev_err_probe(dev, client->irq, "failed to request irq\n"); + return dev_err_probe(dev, ret, "failed to request irq\n"); return 0; } diff --git a/drivers/gpio/gpio-spacemit-k1.c b/drivers/gpio/gpio-spacemit-k1.c index f027066365ff..3cc75c701ec4 100644 --- a/drivers/gpio/gpio-spacemit-k1.c +++ b/drivers/gpio/gpio-spacemit-k1.c @@ -278,6 +278,7 @@ static const struct of_device_id spacemit_gpio_dt_ids[] = { { .compatible = "spacemit,k1-gpio" }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, spacemit_gpio_dt_ids); static struct platform_driver spacemit_gpio_driver = { .probe = spacemit_gpio_probe, diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 73ba73b31cb1..37ab78243fab 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -708,7 +708,7 @@ struct gpio_desc *of_find_gpio(struct device_node *np, const char *con_id, unsigned int idx, unsigned long *flags) { char propname[32]; /* 32 is max size of property name */ - enum of_gpio_flags of_flags; + enum of_gpio_flags of_flags = 0; const of_find_gpio_quirk *q; struct gpio_desc *desc; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index fdafa0df1b43..3a3eca5b4c40 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -3297,14 +3297,15 @@ static int gpiod_get_raw_value_commit(const struct gpio_desc *desc) static int gpio_chip_get_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { - int ret; - lockdep_assert_held(&gc->gpiodev->srcu); if (gc->get_multiple) { + int ret; + ret = gc->get_multiple(gc, mask, bits); if (ret > 0) return -EBADE; + return ret; } if (gc->get) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ca4a6b82817f..df77558e03ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -561,6 +561,13 @@ static uint32_t read_vmid_from_vmfault_reg(struct amdgpu_device *adev) return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); } +static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) + +{ + return 0; +} + const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -578,4 +585,5 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .set_vm_context_page_table_base = set_vm_context_page_table_base, .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, + .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 0f3e2944edd7..e68c0fa8d751 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -582,6 +582,13 @@ static void set_vm_context_page_table_base(struct amdgpu_device *adev, lower_32_bits(page_table_base)); } +static uint32_t kgd_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) + +{ + return 0; +} + const struct kfd2kgd_calls gfx_v8_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -599,4 +606,5 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = { get_atc_vmid_pasid_mapping_info, .set_scratch_backing_va = set_scratch_backing_va, .set_vm_context_page_table_base = set_vm_context_page_table_base, + .hqd_sdma_get_doorbell = kgd_hqd_sdma_get_doorbell, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 85567d0d9545..f5d5c45ddc0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -944,6 +944,7 @@ static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) drm_sched_entity_fini(entity); } } + kref_put(&ctx->refcount, amdgpu_ctx_fini); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 8e626f50b362..f81608330a3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1902,7 +1902,7 @@ no_preempt: continue; } job = to_amdgpu_job(s_job); - if (preempted && (&job->hw_fence) == fence) + if (preempted && (&job->hw_fence.base) == fence) /* mark the job as preempted */ job->preemption_status |= AMDGPU_IB_PREEMPTED; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e1bab6a96cb6..78f8755996f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -6019,16 +6019,12 @@ static int amdgpu_device_health_check(struct list_head *device_list_handle) return ret; } -static int amdgpu_device_halt_activities(struct amdgpu_device *adev, - struct amdgpu_job *job, - struct amdgpu_reset_context *reset_context, - struct list_head *device_list, - struct amdgpu_hive_info *hive, - bool need_emergency_restart) +static int amdgpu_device_recovery_prepare(struct amdgpu_device *adev, + struct list_head *device_list, + struct amdgpu_hive_info *hive) { - struct list_head *device_list_handle = NULL; struct amdgpu_device *tmp_adev = NULL; - int i, r = 0; + int r; /* * Build list of devices to reset. @@ -6045,26 +6041,54 @@ static int amdgpu_device_halt_activities(struct amdgpu_device *adev, } if (!list_is_first(&adev->reset_list, device_list)) list_rotate_to_front(&adev->reset_list, device_list); - device_list_handle = device_list; } else { list_add_tail(&adev->reset_list, device_list); - device_list_handle = device_list; } if (!amdgpu_sriov_vf(adev) && (!adev->pcie_reset_ctx.occurs_dpc)) { - r = amdgpu_device_health_check(device_list_handle); + r = amdgpu_device_health_check(device_list); if (r) return r; } - /* We need to lock reset domain only once both for XGMI and single device */ - tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, - reset_list); + return 0; +} + +static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev, + struct list_head *device_list) +{ + struct amdgpu_device *tmp_adev = NULL; + + if (list_empty(device_list)) + return; + tmp_adev = + list_first_entry(device_list, struct amdgpu_device, reset_list); amdgpu_device_lock_reset_domain(tmp_adev->reset_domain); +} - /* block all schedulers and reset given job's ring */ - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { +static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev, + struct list_head *device_list) +{ + struct amdgpu_device *tmp_adev = NULL; + if (list_empty(device_list)) + return; + tmp_adev = + list_first_entry(device_list, struct amdgpu_device, reset_list); + amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); +} + +static int amdgpu_device_halt_activities( + struct amdgpu_device *adev, struct amdgpu_job *job, + struct amdgpu_reset_context *reset_context, + struct list_head *device_list, struct amdgpu_hive_info *hive, + bool need_emergency_restart) +{ + struct amdgpu_device *tmp_adev = NULL; + int i, r = 0; + + /* block all schedulers and reset given job's ring */ + list_for_each_entry(tmp_adev, device_list, reset_list) { amdgpu_device_set_mp1_state(tmp_adev); /* @@ -6252,11 +6276,6 @@ static void amdgpu_device_gpu_resume(struct amdgpu_device *adev, amdgpu_ras_set_error_query_ready(tmp_adev, true); } - - tmp_adev = list_first_entry(device_list, struct amdgpu_device, - reset_list); - amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); - } @@ -6324,10 +6343,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, reset_context->hive = hive; INIT_LIST_HEAD(&device_list); + if (amdgpu_device_recovery_prepare(adev, &device_list, hive)) + goto end_reset; + + /* We need to lock reset domain only once both for XGMI and single device */ + amdgpu_device_recovery_get_reset_lock(adev, &device_list); + r = amdgpu_device_halt_activities(adev, job, reset_context, &device_list, hive, need_emergency_restart); if (r) - goto end_reset; + goto reset_unlock; if (need_emergency_restart) goto skip_sched_resume; @@ -6337,7 +6362,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * * job->base holds a reference to parent fence */ - if (job && dma_fence_is_signaled(&job->hw_fence)) { + if (job && dma_fence_is_signaled(&job->hw_fence.base)) { job_signaled = true; dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; @@ -6345,13 +6370,15 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, r = amdgpu_device_asic_reset(adev, &device_list, reset_context); if (r) - goto end_reset; + goto reset_unlock; skip_hw_reset: r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled); if (r) - goto end_reset; + goto reset_unlock; skip_sched_resume: amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart); +reset_unlock: + amdgpu_device_recovery_put_reset_lock(adev, &device_list); end_reset: if (hive) { mutex_unlock(&hive->hive_lock); @@ -6763,6 +6790,8 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta memset(&reset_context, 0, sizeof(reset_context)); INIT_LIST_HEAD(&device_list); + amdgpu_device_recovery_prepare(adev, &device_list, hive); + amdgpu_device_recovery_get_reset_lock(adev, &device_list); r = amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list, hive, false); if (hive) { @@ -6880,8 +6909,8 @@ out: if (hive) { list_for_each_entry(tmp_adev, &device_list, reset_list) amdgpu_device_unset_mp1_state(tmp_adev); - amdgpu_device_unlock_reset_domain(adev->reset_domain); } + amdgpu_device_recovery_put_reset_lock(adev, &device_list); } if (hive) { @@ -6927,6 +6956,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev) amdgpu_device_sched_resume(&device_list, NULL, NULL); amdgpu_device_gpu_resume(adev, &device_list, false); + amdgpu_device_recovery_put_reset_lock(adev, &device_list); adev->pcie_reset_ctx.occurs_dpc = false; if (hive) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index a0e9bf9b2710..81b3443c8d7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -321,10 +321,12 @@ static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, const struct firmware *fw; int r; - r = request_firmware(&fw, fw_name, adev->dev); + r = firmware_request_nowarn(&fw, fw_name, adev->dev); if (r) { - dev_err(adev->dev, "can't load firmware \"%s\"\n", - fw_name); + if (amdgpu_discovery == 2) + dev_err(adev->dev, "can't load firmware \"%s\"\n", fw_name); + else + drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fw_name); return r; } @@ -459,16 +461,12 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) /* Read from file if it is the preferred option */ fw_name = amdgpu_discovery_get_fw_name(adev); if (fw_name != NULL) { - dev_info(adev->dev, "use ip discovery information from file"); + drm_dbg(&adev->ddev, "use ip discovery information from file"); r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name); - - if (r) { - dev_err(adev->dev, "failed to read ip discovery binary from file\n"); - r = -EINVAL; + if (r) goto out; - } - } else { + drm_dbg(&adev->ddev, "use ip discovery information from memory"); r = amdgpu_discovery_read_binary_from_mem( adev, adev->mman.discovery_bin); if (r) @@ -1338,10 +1336,8 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) int r; r = amdgpu_discovery_init(adev); - if (r) { - DRM_ERROR("amdgpu_discovery_init failed\n"); + if (r) return r; - } wafl_ver = 0; adev->gfx.xcc_mask = 0; @@ -2579,8 +2575,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; default: r = amdgpu_discovery_reg_base_init(adev); - if (r) - return -EINVAL; + if (r) { + drm_err(&adev->ddev, "discovery failed: %d\n", r); + return r; + } amdgpu_discovery_harvest_ip(adev); amdgpu_discovery_get_gfx_info(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 8cecf25996ed..5fec808d7f54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -41,22 +41,6 @@ #include "amdgpu_trace.h" #include "amdgpu_reset.h" -/* - * Fences mark an event in the GPUs pipeline and are used - * for GPU/CPU synchronization. When the fence is written, - * it is expected that all buffers associated with that fence - * are no longer in use by the associated ring on the GPU and - * that the relevant GPU caches have been flushed. - */ - -struct amdgpu_fence { - struct dma_fence base; - - /* RB, DMA, etc. */ - struct amdgpu_ring *ring; - ktime_t start_timestamp; -}; - static struct kmem_cache *amdgpu_fence_slab; int amdgpu_fence_slab_init(void) @@ -151,12 +135,12 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC); if (am_fence == NULL) return -ENOMEM; - fence = &am_fence->base; - am_fence->ring = ring; } else { /* take use of job-embedded fence */ - fence = &job->hw_fence; + am_fence = &job->hw_fence; } + fence = &am_fence->base; + am_fence->ring = ring; seq = ++ring->fence_drv.sync_seq; if (job && job->job_run_counter) { @@ -718,7 +702,7 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) * it right here or we won't be able to track them in fence_drv * and they will remain unsignaled during sa_bo free. */ - job = container_of(old, struct amdgpu_job, hw_fence); + job = container_of(old, struct amdgpu_job, hw_fence.base); if (!job->base.s_fence && !dma_fence_is_signaled(old)) dma_fence_signal(old); RCU_INIT_POINTER(*ptr, NULL); @@ -780,7 +764,7 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); return (const char *)to_amdgpu_ring(job->base.sched)->name; } @@ -810,7 +794,7 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) */ static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); @@ -845,7 +829,7 @@ static void amdgpu_job_fence_free(struct rcu_head *rcu) struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); /* free job if fence has a parent job */ - kfree(container_of(f, struct amdgpu_job, hw_fence)); + kfree(container_of(f, struct amdgpu_job, hw_fence.base)); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index acb21fc8b3ce..ddb9d3269357 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -272,8 +272,8 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) /* Check if any fences where initialized */ if (job->base.s_fence && job->base.s_fence->finished.ops) f = &job->base.s_fence->finished; - else if (job->hw_fence.ops) - f = &job->hw_fence; + else if (job->hw_fence.base.ops) + f = &job->hw_fence.base; else f = NULL; @@ -290,10 +290,10 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->explicit_sync); /* only put the hw fence if has embedded fence */ - if (!job->hw_fence.ops) + if (!job->hw_fence.base.ops) kfree(job); else - dma_fence_put(&job->hw_fence); + dma_fence_put(&job->hw_fence.base); } void amdgpu_job_set_gang_leader(struct amdgpu_job *job, @@ -322,10 +322,10 @@ void amdgpu_job_free(struct amdgpu_job *job) if (job->gang_submit != &job->base.s_fence->scheduled) dma_fence_put(job->gang_submit); - if (!job->hw_fence.ops) + if (!job->hw_fence.base.ops) kfree(job); else - dma_fence_put(&job->hw_fence); + dma_fence_put(&job->hw_fence.base); } struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index f2c049129661..931fed8892cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -48,7 +48,7 @@ struct amdgpu_job { struct drm_sched_job base; struct amdgpu_vm *vm; struct amdgpu_sync explicit_sync; - struct dma_fence hw_fence; + struct amdgpu_fence hw_fence; struct dma_fence *gang_submit; uint32_t preamble_status; uint32_t preemption_status; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e6f0b035e20b..c14f63cefe67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3522,8 +3522,12 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) uint8_t *ucode_array_start_addr; int err = 0; - err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_sos.bin", chip_name); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sos_kicker.bin", chip_name); + else + err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sos.bin", chip_name); if (err) goto out; @@ -3799,8 +3803,12 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name) struct amdgpu_device *adev = psp->adev; int err; - err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_ta.bin", chip_name); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_ta_kicker.bin", chip_name); + else + err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_ta.bin", chip_name); if (err) return err; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index b95b47110769..e1f25218943a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -127,6 +127,22 @@ struct amdgpu_fence_driver { struct dma_fence **fences; }; +/* + * Fences mark an event in the GPUs pipeline and are used + * for GPU/CPU synchronization. When the fence is written, + * it is expected that all buffers associated with that fence + * are no longer in use by the associated ring on the GPU and + * that the relevant GPU caches have been flushed. + */ + +struct amdgpu_fence { + struct dma_fence base; + + /* RB, DMA, etc. */ + struct amdgpu_ring *ring; + ktime_t start_timestamp; +}; + extern const struct drm_sched_backend_ops amdgpu_sched_ops; void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 6716ac281c49..9b54a1ece447 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -540,8 +540,10 @@ static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id) case IP_VERSION(4, 4, 2): case IP_VERSION(4, 4, 4): case IP_VERSION(4, 4, 5): - /* For SDMA 4.x, use the existing DPM interface for backward compatibility */ - r = amdgpu_dpm_reset_sdma(adev, 1 << instance_id); + /* For SDMA 4.x, use the existing DPM interface for backward compatibility, + * we need to convert the logical instance ID to physical instance ID before reset. + */ + r = amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, instance_id)); break; case IP_VERSION(5, 0, 0): case IP_VERSION(5, 0, 1): @@ -568,7 +570,7 @@ static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id) /** * amdgpu_sdma_reset_engine - Reset a specific SDMA engine * @adev: Pointer to the AMDGPU device - * @instance_id: ID of the SDMA engine instance to reset + * @instance_id: Logical ID of the SDMA engine instance to reset * * Returns: 0 on success, or a negative error code on failure. */ @@ -601,7 +603,7 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) /* Perform the SDMA reset for the specified instance */ ret = amdgpu_sdma_soft_reset(adev, instance_id); if (ret) { - dev_err(adev->dev, "Failed to reset SDMA instance %u\n", instance_id); + dev_err(adev->dev, "Failed to reset SDMA logical instance %u\n", instance_id); goto exit; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 2505c46a9c3d..eaddc441c51a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -30,6 +30,10 @@ #define AMDGPU_UCODE_NAME_MAX (128) +static const struct kicker_device kicker_device_list[] = { + {0x744B, 0x00}, +}; + static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr) { DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes)); @@ -1387,6 +1391,19 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl return NULL; } +bool amdgpu_is_kicker_fw(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(kicker_device_list); i++) { + if (adev->pdev->device == kicker_device_list[i].device && + adev->pdev->revision == kicker_device_list[i].revision) + return true; + } + + return false; +} + void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len) { int maj, min, rev; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 9e89c3487be5..6349aad6da35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -605,6 +605,11 @@ struct amdgpu_firmware { uint32_t pldm_version; }; +struct kicker_device{ + unsigned short device; + u8 revision; +}; + void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_imu_hdr(const struct common_firmware_header *hdr); @@ -632,5 +637,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id); void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len); +bool amdgpu_is_kicker_fw(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index afd6d59164bf..ec9b84f92d46 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -85,6 +85,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); @@ -759,6 +760,10 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, AMDGPU_UCODE_REQUIRED, "amdgpu/gc_11_0_0_rlc_1.bin"); + else if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_rlc_kicker.bin", ucode_prefix); else err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, AMDGPU_UCODE_REQUIRED, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d377a7c57d5e..ad9be3656653 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2235,6 +2235,25 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 0, 1): + case IP_VERSION(9, 2, 1): + case IP_VERSION(9, 4, 0): + case IP_VERSION(9, 2, 2): + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 3, 0): + adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 167 && + adev->gfx.pfp_fw_version >= 196 && + adev->gfx.mec_fw_version >= 474) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; case IP_VERSION(9, 4, 2): adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex; adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex); diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index cfa91d709d49..cc626036ed9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -32,6 +32,7 @@ #include "gc/gc_11_0_0_sh_mask.h" MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu_kicker.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin"); @@ -51,8 +52,12 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_imu.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index c9eba537de09..28eb846280dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1630,10 +1630,12 @@ static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block) if (r) goto failure; - r = mes_v11_0_set_hw_resources_1(&adev->mes); - if (r) { - DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); - goto failure; + if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x50) { + r = mes_v11_0_set_hw_resources_1(&adev->mes); + if (r) { + DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); + goto failure; + } } r = mes_v11_0_query_sched_status(&adev->mes); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index b4f17332d466..6b222630f3fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -1742,7 +1742,8 @@ static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block) if (r) goto failure; - mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); + if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x4b) + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); mes_v12_0_init_aggregated_doorbell(&adev->mes); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index df612fd9cc50..ead616c11705 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -42,7 +42,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin"); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 9c169112a5e7..bb82c652e4c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -45,6 +45,7 @@ #include "amdgpu_ras.h" MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin"); +MODULE_FIRMWARE("amdgpu/sdma_4_4_4.bin"); MODULE_FIRMWARE("amdgpu/sdma_4_4_5.bin"); static const struct amdgpu_hwip_reg_entry sdma_reg_list_4_4_2[] = { @@ -490,7 +491,7 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, { struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 doorbell_offset, doorbell; - u32 rb_cntl, ib_cntl; + u32 rb_cntl, ib_cntl, sdma_cntl; int i; for_each_inst(i, inst_mask) { @@ -502,6 +503,9 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl); + sdma_cntl = RREG32_SDMA(i, regSDMA_CNTL); + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, UTC_L1_ENABLE, 0); + WREG32_SDMA(i, regSDMA_CNTL, sdma_cntl); if (sdma[i]->use_doorbell) { doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); @@ -995,6 +999,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, /* set utc l1 enable flag always to 1 */ temp = RREG32_SDMA(i, regSDMA_CNTL); temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1); + WREG32_SDMA(i, regSDMA_CNTL, temp); if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) { /* enable context empty interrupt during initialization */ @@ -1670,7 +1675,7 @@ static bool sdma_v4_4_2_page_ring_is_guilty(struct amdgpu_ring *ring) static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; - u32 id = GET_INST(SDMA0, ring->me); + u32 id = ring->me; int r; if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) @@ -1686,7 +1691,7 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u32 instance_id = GET_INST(SDMA0, ring->me); + u32 instance_id = ring->me; u32 inst_mask; uint64_t rptr; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 9505ae96fbec..37f4b5b4a098 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1399,6 +1399,7 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block) return r; for (i = 0; i < adev->sdma.num_instances; i++) { + mutex_init(&adev->sdma.instance[i].engine_reset_mutex); adev->sdma.instance[i].funcs = &sdma_v5_0_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; @@ -1542,8 +1543,13 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; u32 inst_id = ring->me; + int r; + + amdgpu_amdkfd_suspend(adev, true); + r = amdgpu_sdma_reset_engine(adev, inst_id); + amdgpu_amdkfd_resume(adev, true); - return amdgpu_sdma_reset_engine(adev, inst_id); + return r; } static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index a6e612b4a892..0b40411b92a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1318,6 +1318,7 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block) } for (i = 0; i < adev->sdma.num_instances; i++) { + mutex_init(&adev->sdma.instance[i].engine_reset_mutex); adev->sdma.instance[i].funcs = &sdma_v5_2_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; @@ -1455,8 +1456,13 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; u32 inst_id = ring->me; + int r; + + amdgpu_amdkfd_suspend(adev, true); + r = amdgpu_sdma_reset_engine(adev, inst_id); + amdgpu_amdkfd_resume(adev, true); - return amdgpu_sdma_reset_engine(adev, inst_id); + return r; } static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 5a70ae17be04..a9bdf8d61d6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1374,9 +1374,22 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); - /* add firmware version checks here */ - if (0 && !adev->sdma.disable_uq) - adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(6, 0, 0): + if ((adev->sdma.instance[0].fw_version >= 24) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 0, 2): + if ((adev->sdma.instance[0].fw_version >= 21) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + case IP_VERSION(6, 0, 3): + if ((adev->sdma.instance[0].fw_version >= 25) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + default: + break; + } r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index ad47d0bdf777..86903eccbd4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1349,9 +1349,15 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); - /* add firmware version checks here */ - if (0 && !adev->sdma.disable_uq) - adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(7, 0, 0): + case IP_VERSION(7, 0, 1): + if ((adev->sdma.instance[0].fw_version >= 7836028) && !adev->sdma.disable_uq) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + break; + default: + break; + } return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 338cf43c45fe..cdefd7fcb0da 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -669,6 +669,9 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, if (indirect) amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + /* resetting ring, fw should not check RB ring */ + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + /* Pause dpg */ vcn_v5_0_1_pause_dpg_mode(vinst, &state); @@ -681,7 +684,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); - fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); @@ -692,6 +695,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); tmp |= VCN_RB_ENABLE__RB1_EN_MASK; WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + /* resetting done, fw can check RB ring */ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 8fa6489b6f5d..505036968a77 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -240,7 +240,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, packet->bitfields2.engine_sel = engine_sel__mes_map_queues__compute_vi; - packet->bitfields2.gws_control_queue = q->gws ? 1 : 0; + packet->bitfields2.gws_control_queue = q->properties.is_gws ? 1 : 0; packet->bitfields2.extended_engine_sel = extended_engine_sel__mes_map_queues__legacy_engine_sel; packet->bitfields2.queue_type = diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 865dca2547de..a0f22ea6d15a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1171,13 +1171,12 @@ svm_range_split_head(struct svm_range *prange, uint64_t new_start, } static void -svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, - struct svm_range *pchild, enum svm_work_list_ops op) +svm_range_add_child(struct svm_range *prange, struct svm_range *pchild, enum svm_work_list_ops op) { pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n", pchild, pchild->start, pchild->last, prange, op); - pchild->work_item.mm = mm; + pchild->work_item.mm = NULL; pchild->work_item.op = op; list_add_tail(&pchild->child_list, &prange->child_list); } @@ -1278,7 +1277,7 @@ svm_range_get_pte_flags(struct kfd_node *node, mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; /* system memory accessed by the dGPU */ } else { - if (gc_ip_version < IP_VERSION(9, 5, 0)) + if (gc_ip_version < IP_VERSION(9, 5, 0) || ext_coherent) mapping_flags |= AMDGPU_VM_MTYPE_UC; else mapping_flags |= AMDGPU_VM_MTYPE_NC; @@ -2394,15 +2393,17 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, prange->work_item.op != SVM_OP_UNMAP_RANGE) prange->work_item.op = op; } else { - prange->work_item.op = op; - - /* Pairs with mmput in deferred_list_work */ - mmget(mm); - prange->work_item.mm = mm; - list_add_tail(&prange->deferred_list, - &prange->svms->deferred_range_list); - pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n", - prange, prange->start, prange->last, op); + /* Pairs with mmput in deferred_list_work. + * If process is exiting and mm is gone, don't update mmu notifier. + */ + if (mmget_not_zero(mm)) { + prange->work_item.mm = mm; + prange->work_item.op = op; + list_add_tail(&prange->deferred_list, + &prange->svms->deferred_range_list); + pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n", + prange, prange->start, prange->last, op); + } } spin_unlock(&svms->deferred_list_lock); } @@ -2416,8 +2417,7 @@ void schedule_deferred_list_work(struct svm_range_list *svms) } static void -svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent, - struct svm_range *prange, unsigned long start, +svm_range_unmap_split(struct svm_range *parent, struct svm_range *prange, unsigned long start, unsigned long last) { struct svm_range *head; @@ -2438,12 +2438,12 @@ svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent, svm_range_split(tail, last + 1, tail->last, &head); if (head != prange && tail != prange) { - svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); - svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); + svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, tail, SVM_OP_ADD_RANGE); } else if (tail != prange) { - svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, tail, SVM_OP_UNMAP_RANGE); } else if (head != prange) { - svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE); + svm_range_add_child(parent, head, SVM_OP_UNMAP_RANGE); } else if (parent != prange) { prange->work_item.op = SVM_OP_UNMAP_RANGE; } @@ -2520,14 +2520,14 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, l = min(last, pchild->last); if (l >= s) svm_range_unmap_from_gpus(pchild, s, l, trigger); - svm_range_unmap_split(mm, prange, pchild, start, last); + svm_range_unmap_split(prange, pchild, start, last); mutex_unlock(&pchild->lock); } s = max(start, prange->start); l = min(last, prange->last); if (l >= s) svm_range_unmap_from_gpus(prange, s, l, trigger); - svm_range_unmap_split(mm, prange, prange, start, last); + svm_range_unmap_split(prange, prange, start, last); if (unmap_parent) svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE); @@ -2570,8 +2570,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, if (range->event == MMU_NOTIFY_RELEASE) return true; - if (!mmget_not_zero(mni->mm)) - return true; start = mni->interval_tree.start; last = mni->interval_tree.last; @@ -2598,7 +2596,6 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, } svm_range_unlock(prange); - mmput(mni->mm); return true; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index baa2374acdeb..4ec73f33535e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -510,6 +510,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.capability |= HSA_CAP_AQL_QUEUE_DOUBLE_MAP; + if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0) && + (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED; + sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute", dev->node_props.max_engine_clk_fcompute); @@ -2008,8 +2012,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) if (!amdgpu_sriov_vf(dev->gpu->adev)) dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED; - if (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE) - dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED; } else { dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d3100f641ac6..f58fa5da7fe5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3610,13 +3610,15 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) luminance_range = &conn_base->display_info.luminance_range; - if (luminance_range->max_luminance) { - caps->aux_min_input_signal = luminance_range->min_luminance; + if (luminance_range->max_luminance) caps->aux_max_input_signal = luminance_range->max_luminance; - } else { - caps->aux_min_input_signal = 0; + else caps->aux_max_input_signal = 512; - } + + if (luminance_range->min_luminance) + caps->aux_min_input_signal = luminance_range->min_luminance; + else + caps->aux_min_input_signal = 1; min_input_signal_override = drm_get_panel_min_brightness_quirk(aconnector->drm_edid); if (min_input_signal_override >= 0) @@ -4718,9 +4720,23 @@ static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps, return 1; } +/* Rescale from [min..max] to [0..MAX_BACKLIGHT_LEVEL] */ +static inline u32 scale_input_to_fw(int min, int max, u64 input) +{ + return DIV_ROUND_CLOSEST_ULL(input * MAX_BACKLIGHT_LEVEL, max - min); +} + +/* Rescale from [0..MAX_BACKLIGHT_LEVEL] to [min..max] */ +static inline u32 scale_fw_to_input(int min, int max, u64 input) +{ + return min + DIV_ROUND_CLOSEST_ULL(input * (max - min), MAX_BACKLIGHT_LEVEL); +} + static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *caps, - uint32_t *brightness) + unsigned int min, unsigned int max, + uint32_t *user_brightness) { + u32 brightness = scale_input_to_fw(min, max, *user_brightness); u8 prev_signal = 0, prev_lum = 0; int i = 0; @@ -4731,7 +4747,7 @@ static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *cap return; /* choose start to run less interpolation steps */ - if (caps->luminance_data[caps->data_points/2].input_signal > *brightness) + if (caps->luminance_data[caps->data_points/2].input_signal > brightness) i = caps->data_points/2; do { u8 signal = caps->luminance_data[i].input_signal; @@ -4742,17 +4758,18 @@ static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *cap * brightness < signal: interpolate between previous and current luminance numerator * brightness > signal: find next data point */ - if (*brightness > signal) { + if (brightness > signal) { prev_signal = signal; prev_lum = lum; i++; continue; } - if (*brightness < signal) + if (brightness < signal) lum = prev_lum + DIV_ROUND_CLOSEST((lum - prev_lum) * - (*brightness - prev_signal), + (brightness - prev_signal), signal - prev_signal); - *brightness = DIV_ROUND_CLOSEST(lum * *brightness, 101); + *user_brightness = scale_fw_to_input(min, max, + DIV_ROUND_CLOSEST(lum * brightness, 101)); return; } while (i < caps->data_points); } @@ -4765,11 +4782,10 @@ static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *c if (!get_brightness_range(caps, &min, &max)) return brightness; - convert_custom_brightness(caps, &brightness); + convert_custom_brightness(caps, min, max, &brightness); - // Rescale 0..255 to min..max - return min + DIV_ROUND_CLOSEST((max - min) * brightness, - AMDGPU_MAX_BL_LEVEL); + // Rescale 0..max to min..max + return min + DIV_ROUND_CLOSEST_ULL((u64)(max - min) * brightness, max); } static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *caps, @@ -4782,8 +4798,8 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap if (brightness < min) return 0; - // Rescale min..max to 0..255 - return DIV_ROUND_CLOSEST(AMDGPU_MAX_BL_LEVEL * (brightness - min), + // Rescale min..max to 0..max + return DIV_ROUND_CLOSEST_ULL((u64)max * (brightness - min), max - min); } @@ -4908,7 +4924,7 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) struct drm_device *drm = aconnector->base.dev; struct amdgpu_display_manager *dm = &drm_to_adev(drm)->dm; struct backlight_properties props = { 0 }; - struct amdgpu_dm_backlight_caps caps = { 0 }; + struct amdgpu_dm_backlight_caps *caps; char bl_name[16]; int min, max; @@ -4922,22 +4938,21 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) return; } - amdgpu_acpi_get_backlight_caps(&caps); - if (caps.caps_valid && get_brightness_range(&caps, &min, &max)) { + caps = &dm->backlight_caps[aconnector->bl_idx]; + if (get_brightness_range(caps, &min, &max)) { if (power_supply_is_system_supplied() > 0) - props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps.ac_level, 100); + props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps->ac_level, 100); else - props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps.dc_level, 100); + props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps->dc_level, 100); /* min is zero, so max needs to be adjusted */ props.max_brightness = max - min; drm_dbg(drm, "Backlight caps: min: %d, max: %d, ac %d, dc %d\n", min, max, - caps.ac_level, caps.dc_level); + caps->ac_level, caps->dc_level); } else - props.brightness = AMDGPU_MAX_BL_LEVEL; + props.brightness = props.max_brightness = MAX_BACKLIGHT_LEVEL; - if (caps.data_points && !(amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)) + if (caps->data_points && !(amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)) drm_info(drm, "Using custom brightness curve\n"); - props.max_brightness = AMDGPU_MAX_BL_LEVEL; props.type = BACKLIGHT_RAW; snprintf(bl_name, sizeof(bl_name), "amdgpu_bl%d", diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index d4395b92fb85..9e3e51a2dc49 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -1029,6 +1029,10 @@ enum dc_edid_status dm_helpers_read_local_edid( return EDID_NO_RESPONSE; edid = drm_edid_raw(drm_edid); // FIXME: Get rid of drm_edid_raw() + if (!edid || + edid->extensions >= sizeof(sink->dc_edid.raw_edid) / EDID_LENGTH) + return EDID_BAD_INPUT; + sink->dc_edid.length = EDID_LENGTH * (edid->extensions + 1); memmove(sink->dc_edid.raw_edid, (uint8_t *)edid, sink->dc_edid.length); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 56d011a1323c..b34b5b52236d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -241,6 +241,7 @@ static bool create_links( DC_LOG_DC("BIOS object table - end"); /* Create a link for each usb4 dpia port */ + dc->lowest_dpia_link_index = MAX_LINKS; for (i = 0; i < dc->res_pool->usb4_dpia_count; i++) { struct link_init_data link_init_params = {0}; struct dc_link *link; @@ -253,6 +254,9 @@ static bool create_links( link = dc->link_srv->create_link(&link_init_params); if (link) { + if (dc->lowest_dpia_link_index > dc->link_count) + dc->lowest_dpia_link_index = dc->link_count; + dc->links[dc->link_count] = link; link->dc = dc; ++dc->link_count; @@ -6376,6 +6380,35 @@ unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context) else return 0; } +/** + *********************************************************************************************** + * dc_get_host_router_index: Get index of host router from a dpia link + * + * This function return a host router index of the target link. If the target link is dpia link. + * + * @param [in] link: target link + * @param [out] host_router_index: host router index of the target link + * + * @return: true if the host router index is found and valid. + * + *********************************************************************************************** + */ +bool dc_get_host_router_index(const struct dc_link *link, unsigned int *host_router_index) +{ + struct dc *dc = link->ctx->dc; + + if (link->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) + return false; + + if (link->link_index < dc->lowest_dpia_link_index) + return false; + + *host_router_index = (link->link_index - dc->lowest_dpia_link_index) / dc->caps.num_of_dpias_per_host_router; + if (*host_router_index < dc->caps.num_of_host_routers) + return true; + else + return false; +} bool dc_is_cursor_limit_pending(struct dc *dc) { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 1d917be36fc4..f41073c0147e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -66,7 +66,8 @@ struct dmub_notification; #define MAX_STREAMS 6 #define MIN_VIEWPORT_SIZE 12 #define MAX_NUM_EDP 2 -#define MAX_HOST_ROUTERS_NUM 2 +#define MAX_HOST_ROUTERS_NUM 3 +#define MAX_DPIA_PER_HOST_ROUTER 2 /* Display Core Interfaces */ struct dc_versions { @@ -305,6 +306,8 @@ struct dc_caps { /* Conservative limit for DCC cases which require ODM4:1 to support*/ uint32_t dcc_plane_width_limit; struct dc_scl_caps scl_caps; + uint8_t num_of_host_routers; + uint8_t num_of_dpias_per_host_router; }; struct dc_bug_wa { @@ -1603,6 +1606,7 @@ struct dc { uint8_t link_count; struct dc_link *links[MAX_LINKS]; + uint8_t lowest_dpia_link_index; struct link_service *link_srv; struct dc_state *current_state; @@ -2595,6 +2599,8 @@ struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context); +bool dc_get_host_router_index(const struct dc_link *link, unsigned int *host_router_index); + /* DSC Interfaces */ #include "dc_dsc.h" diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 0bad8304ccf6..d346f8ae1634 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -1172,8 +1172,8 @@ struct dc_lttpr_caps { union dp_128b_132b_supported_lttpr_link_rates supported_128b_132b_rates; union dp_alpm_lttpr_cap alpm; uint8_t aux_rd_interval[MAX_REPEATER_CNT - 1]; - uint8_t lttpr_ieee_oui[3]; - uint8_t lttpr_device_id[6]; + uint8_t lttpr_ieee_oui[3]; // Always read from closest LTTPR to host + uint8_t lttpr_device_id[6]; // Always read from closest LTTPR to host }; struct dc_dongle_dfp_cap_ext { diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index d562ddeca512..c9f6c6275ca1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -974,6 +974,7 @@ struct dc_crtc_timing { uint32_t pix_clk_100hz; uint32_t min_refresh_in_uhz; + uint32_t max_refresh_in_uhz; uint32_t vic; uint32_t hdmi_vic; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index d47cacfdb695..2aa6d44bb359 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -788,6 +788,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm plane->pixel_format = dml2_420_10; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: plane->pixel_format = dml2_444_64; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index c4dad7164d31..5b62cd19d979 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -4685,7 +4685,10 @@ static void calculate_tdlut_setting( //the tdlut is fetched during the 2 row times of prefetch. if (p->setup_for_tdlut) { *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); - *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; + if (*p->tdlut_bytes_per_frame > p->cursor_buffer_size * 1024) + *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; + else + *p->tdlut_opt_time = 0; *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0); } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 5de775fd8fce..208630754c8a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -953,6 +953,7 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p out->SourcePixelFormat[location] = dml_420_10; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: out->SourcePixelFormat[location] = dml_444_64; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index e8730cc40edb..38e17b1796e1 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1225,7 +1225,7 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx) return; if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) { - if (!link->skip_implict_edp_power_control) + if (!link->skip_implict_edp_power_control && hws) hws->funcs.edp_backlight_control(link, false); link->dc->hwss.set_abm_immediate_disable(pipe_ctx); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index c814d957305a..a267f574b619 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -1047,6 +1047,15 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (dc->caps.sequential_ono) { update_state->pg_pipe_res_update[PG_HUBP][pipe_ctx->stream_res.dsc->inst] = false; update_state->pg_pipe_res_update[PG_DPP][pipe_ctx->stream_res.dsc->inst] = false; + + /* All HUBP/DPP instances must be powered if the DSC inst != HUBP inst */ + if (!pipe_ctx->top_pipe && pipe_ctx->plane_res.hubp && + pipe_ctx->plane_res.hubp->inst != pipe_ctx->stream_res.dsc->inst) { + for (j = 0; j < dc->res_pool->pipe_count; ++j) { + update_state->pg_pipe_res_update[PG_HUBP][j] = false; + update_state->pg_pipe_res_update[PG_DPP][j] = false; + } + } } } @@ -1193,6 +1202,25 @@ void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context, update_state->pg_pipe_res_update[PG_HDMISTREAM][0] = true; if (dc->caps.sequential_ono) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; + + if (new_pipe->stream_res.dsc && !new_pipe->top_pipe && + update_state->pg_pipe_res_update[PG_DSC][new_pipe->stream_res.dsc->inst]) { + update_state->pg_pipe_res_update[PG_HUBP][new_pipe->stream_res.dsc->inst] = true; + update_state->pg_pipe_res_update[PG_DPP][new_pipe->stream_res.dsc->inst] = true; + + /* All HUBP/DPP instances must be powered if the DSC inst != HUBP inst */ + if (new_pipe->plane_res.hubp && + new_pipe->plane_res.hubp->inst != new_pipe->stream_res.dsc->inst) { + for (j = 0; j < dc->res_pool->pipe_count; ++j) { + update_state->pg_pipe_res_update[PG_HUBP][j] = true; + update_state->pg_pipe_res_update[PG_DPP][j] = true; + } + } + } + } + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { if (update_state->pg_pipe_res_update[PG_HUBP][i] && update_state->pg_pipe_res_update[PG_DPP][i]) { diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index a5127c2d47ef..0f965380a9b4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -385,9 +385,15 @@ bool dp_is_128b_132b_signal(struct pipe_ctx *pipe_ctx) bool dp_is_lttpr_present(struct dc_link *link) { /* Some sink devices report invalid LTTPR revision, so don't validate against that cap */ - return (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) != 0 && + uint32_t lttpr_count = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + bool is_lttpr_present = (lttpr_count > 0 && link->dpcd_caps.lttpr_caps.max_lane_count > 0 && link->dpcd_caps.lttpr_caps.max_lane_count <= 4); + + if (lttpr_count > 0 && !is_lttpr_present) + DC_LOG_ERROR("LTTPR count is nonzero but invalid lane count reported. Assuming no LTTPR present.\n"); + + return is_lttpr_present; } /* in DP compliance test, DPR-120 may have @@ -1551,6 +1557,8 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) uint8_t lttpr_dpcd_data[10] = {0}; enum dc_status status; bool is_lttpr_present; + uint32_t lttpr_count; + uint32_t closest_lttpr_offset; /* Logic to determine LTTPR support*/ bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware; @@ -1602,20 +1610,22 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) lttpr_dpcd_data[DP_LTTPR_ALPM_CAPABILITIES - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + lttpr_count = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + /* If this chip cap is set, at least one retimer must exist in the chain * Override count to 1 if we receive a known bad count (0 or an invalid value) */ if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && - (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) == 0)) { + lttpr_count == 0) { /* If you see this message consistently, either the host platform has FIXED_VS flag * incorrectly configured or the sink device is returning an invalid count. */ DC_LOG_ERROR("lttpr_caps phy_repeater_cnt is 0x%x, forcing it to 0x80.", link->dpcd_caps.lttpr_caps.phy_repeater_cnt); link->dpcd_caps.lttpr_caps.phy_repeater_cnt = 0x80; + lttpr_count = 1; DC_LOG_DC("lttpr_caps forced phy_repeater_cnt = %d\n", link->dpcd_caps.lttpr_caps.phy_repeater_cnt); } - /* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */ is_lttpr_present = dp_is_lttpr_present(link); DC_LOG_DC("is_lttpr_present = %d\n", is_lttpr_present); @@ -1623,11 +1633,25 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) if (is_lttpr_present) { CONN_DATA_DETECT(link, lttpr_dpcd_data, sizeof(lttpr_dpcd_data), "LTTPR Caps: "); - core_link_read_dpcd(link, DP_LTTPR_IEEE_OUI, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui)); - CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui), "LTTPR IEEE OUI: "); + // Identify closest LTTPR to determine if workarounds required for known embedded LTTPR + closest_lttpr_offset = dp_get_closest_lttpr_offset(lttpr_count); - core_link_read_dpcd(link, DP_LTTPR_DEVICE_ID, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id)); - CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id), "LTTPR Device ID: "); + core_link_read_dpcd(link, (DP_LTTPR_IEEE_OUI + closest_lttpr_offset), + link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui)); + core_link_read_dpcd(link, (DP_LTTPR_DEVICE_ID + closest_lttpr_offset), + link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id)); + + if (lttpr_count > 1) { + CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui), + "Closest LTTPR To Host's IEEE OUI: "); + CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id), + "Closest LTTPR To Host's LTTPR Device ID: "); + } else { + CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui), + "LTTPR IEEE OUI: "); + CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id), + "LTTPR Device ID: "); + } } return status; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 7e0af5297dc4..51ca0b2959fc 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -1954,6 +1954,9 @@ static bool dcn31_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; + /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; dc->config.disable_hbr_audio_dp2 = true; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index d96bc6cb73ad..8383e2e59be5 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -1885,6 +1885,9 @@ static bool dcn314_resource_construct( dc->caps.max_disp_clock_khz_at_vmin = 650000; + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; + /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 72c6cf047db0..e01aa2f2e13e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -1894,6 +1894,9 @@ static bool dcn35_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; + /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order * to provide some margin. * It's expected for furture ASIC to have equal or higher value, in order to diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 989a270f7dea..4ebe4e00a4f8 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -1866,6 +1866,9 @@ static bool dcn351_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; + /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order * to provide some margin. * It's expected for furture ASIC to have equal or higher value, in order to diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c index 48e1f234185f..db36b8f9ce65 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c @@ -1867,6 +1867,9 @@ static bool dcn36_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; + /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order * to provide some margin. * It's expected for furture ASIC to have equal or higher value, in order to diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 3ba9b62ba70b..250f09922d2f 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -155,6 +155,14 @@ unsigned int mod_freesync_calc_v_total_from_refresh( v_total = div64_u64(div64_u64(((unsigned long long)( frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), stream->timing.h_total), 1000000); + } else if (refresh_in_uhz >= stream->timing.max_refresh_in_uhz) { + /* When the target refresh rate is the maximum panel refresh rate + * round up the vtotal value to prevent off-by-one error causing + * v_total_min to be below the panel's lower bound + */ + v_total = div64_u64(div64_u64(((unsigned long long)( + frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), + stream->timing.h_total) + (1000000 - 1), 1000000); } else { v_total = div64_u64(div64_u64(((unsigned long long)( frame_duration_in_ns) * (stream->timing.pix_clk_100hz / 10)), diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index a7167668d189..1c7235935d14 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -58,6 +58,7 @@ MODULE_FIRMWARE("amdgpu/aldebaran_smc.bin"); MODULE_FIRMWARE("amdgpu/smu_13_0_0.bin"); +MODULE_FIRMWARE("amdgpu/smu_13_0_0_kicker.bin"); MODULE_FIRMWARE("amdgpu/smu_13_0_7.bin"); MODULE_FIRMWARE("amdgpu/smu_13_0_10.bin"); @@ -92,7 +93,7 @@ const int pmfw_decoded_link_width[7] = {0, 1, 2, 4, 8, 12, 16}; int smu_v13_0_init_microcode(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - char ucode_prefix[15]; + char ucode_prefix[30]; int err = 0; const struct smc_firmware_header_v1_0 *hdr; const struct common_firmware_header *header; @@ -103,8 +104,13 @@ int smu_v13_0_init_microcode(struct smu_context *smu) return 0; amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s.bin", ucode_prefix); + + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 34547edf1ee3..87f2e5ee8790 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -159,7 +159,7 @@ bool malidp_format_mod_supported(struct drm_device *drm, } if (!fourcc_mod_is_vendor(modifier, ARM)) { - DRM_ERROR("Unknown modifier (not Arm)\n"); + DRM_DEBUG_KMS("Unknown modifier (not Arm)\n"); return false; } diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 1de832964e92..031980d8f3ab 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -29,7 +29,6 @@ */ #include <linux/delay.h> -#include <linux/export.h> #include <linux/pci.h> #include <drm/drm_atomic.h> diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c index b3f588b71a7d..af6f79793407 100644 --- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -64,10 +64,11 @@ struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, str adev->id = ret; adev->name = "dp_hpd_bridge"; adev->dev.parent = parent; - adev->dev.of_node = of_node_get(parent->of_node); adev->dev.release = drm_aux_hpd_bridge_release; adev->dev.platform_data = of_node_get(np); + device_set_of_node_from_dev(&adev->dev, parent); + ret = auxiliary_device_init(adev); if (ret) { of_node_put(adev->dev.platform_data); diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c index 79b009ab9396..29b0358a7b6d 100644 --- a/drivers/gpu/drm/bridge/panel.c +++ b/drivers/gpu/drm/bridge/panel.c @@ -299,6 +299,7 @@ struct drm_bridge *drm_panel_bridge_add_typed(struct drm_panel *panel, panel_bridge->bridge.of_node = panel->dev->of_node; panel_bridge->bridge.ops = DRM_BRIDGE_OP_MODES; panel_bridge->bridge.type = connector_type; + panel_bridge->bridge.pre_enable_prev_first = panel->prepare_prev_first; drm_bridge_add(&panel_bridge->bridge); @@ -413,8 +414,6 @@ struct drm_bridge *devm_drm_panel_bridge_add_typed(struct device *dev, return bridge; } - bridge->pre_enable_prev_first = panel->prepare_prev_first; - *ptr = bridge; devres_add(dev, ptr); @@ -456,8 +455,6 @@ struct drm_bridge *drmm_panel_bridge_add(struct drm_device *drm, if (ret) return ERR_PTR(ret); - bridge->pre_enable_prev_first = panel->prepare_prev_first; - return bridge; } EXPORT_SYMBOL(drmm_panel_bridge_add); diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c index 0014c497e3fe..bccc88d25948 100644 --- a/drivers/gpu/drm/bridge/samsung-dsim.c +++ b/drivers/gpu/drm/bridge/samsung-dsim.c @@ -1095,7 +1095,7 @@ static void samsung_dsim_send_to_fifo(struct samsung_dsim *dsi, bool first = !xfer->tx_done; u32 reg; - dev_dbg(dev, "< xfer %pK: tx len %u, done %u, rx len %u, done %u\n", + dev_dbg(dev, "< xfer %p: tx len %u, done %u, rx len %u, done %u\n", xfer, length, xfer->tx_done, xfer->rx_len, xfer->rx_done); if (length > DSI_TX_FIFO_SIZE) @@ -1293,7 +1293,7 @@ static bool samsung_dsim_transfer_finish(struct samsung_dsim *dsi) spin_unlock_irqrestore(&dsi->transfer_lock, flags); dev_dbg(dsi->dev, - "> xfer %pK, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n", + "> xfer %p, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n", xfer, xfer->packet.payload_length, xfer->tx_done, xfer->rx_len, xfer->rx_done); diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 60224f476e1d..de9c23537465 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -348,12 +348,18 @@ static void ti_sn65dsi86_enable_comms(struct ti_sn65dsi86 *pdata, * 200 ms. We'll assume that the panel driver will have the hardcoded * delay in its prepare and always disable HPD. * - * If HPD somehow makes sense on some future panel we'll have to - * change this to be conditional on someone specifying that HPD should - * be used. + * For DisplayPort bridge type, we need HPD. So we use the bridge type + * to conditionally disable HPD. + * NOTE: The bridge type is set in ti_sn_bridge_probe() but enable_comms() + * can be called before. So for DisplayPort, HPD will be enabled once + * bridge type is set. We are using bridge type instead of "no-hpd" + * property because it is not used properly in devicetree description + * and hence is unreliable. */ - regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, HPD_DISABLE, - HPD_DISABLE); + + if (pdata->bridge.type != DRM_MODE_CONNECTOR_DisplayPort) + regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, HPD_DISABLE, + HPD_DISABLE); pdata->comms_enabled = true; @@ -1195,9 +1201,14 @@ static enum drm_connector_status ti_sn_bridge_detect(struct drm_bridge *bridge) struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); int val = 0; - pm_runtime_get_sync(pdata->dev); + /* + * Runtime reference is grabbed in ti_sn_bridge_hpd_enable() + * as the chip won't report HPD just after being powered on. + * HPD_DEBOUNCED_STATE reflects correct state only after the + * debounce time (~100-400 ms). + */ + regmap_read(pdata->regmap, SN_HPD_DISABLE_REG, &val); - pm_runtime_put_autosuspend(pdata->dev); return val & HPD_DEBOUNCED_STATE ? connector_status_connected : connector_status_disconnected; @@ -1220,6 +1231,26 @@ static void ti_sn65dsi86_debugfs_init(struct drm_bridge *bridge, struct dentry * debugfs_create_file("status", 0600, debugfs, pdata, &status_fops); } +static void ti_sn_bridge_hpd_enable(struct drm_bridge *bridge) +{ + struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); + + /* + * Device needs to be powered on before reading the HPD state + * for reliable hpd detection in ti_sn_bridge_detect() due to + * the high debounce time. + */ + + pm_runtime_get_sync(pdata->dev); +} + +static void ti_sn_bridge_hpd_disable(struct drm_bridge *bridge) +{ + struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); + + pm_runtime_put_autosuspend(pdata->dev); +} + static const struct drm_bridge_funcs ti_sn_bridge_funcs = { .attach = ti_sn_bridge_attach, .detach = ti_sn_bridge_detach, @@ -1234,6 +1265,8 @@ static const struct drm_bridge_funcs ti_sn_bridge_funcs = { .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, .debugfs_init = ti_sn65dsi86_debugfs_init, + .hpd_enable = ti_sn_bridge_hpd_enable, + .hpd_disable = ti_sn_bridge_hpd_disable, }; static void ti_sn_bridge_parse_lanes(struct ti_sn65dsi86 *pdata, @@ -1321,8 +1354,26 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, pdata->bridge.type = pdata->next_bridge->type == DRM_MODE_CONNECTOR_DisplayPort ? DRM_MODE_CONNECTOR_DisplayPort : DRM_MODE_CONNECTOR_eDP; - if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort) - pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT; + if (pdata->bridge.type == DRM_MODE_CONNECTOR_DisplayPort) { + pdata->bridge.ops = DRM_BRIDGE_OP_EDID | DRM_BRIDGE_OP_DETECT | + DRM_BRIDGE_OP_HPD; + /* + * If comms were already enabled they would have been enabled + * with the wrong value of HPD_DISABLE. Update it now. Comms + * could be enabled if anyone is holding a pm_runtime reference + * (like if a GPIO is in use). Note that in most cases nobody + * is doing AUX channel xfers before the bridge is added so + * HPD doesn't _really_ matter then. The only exception is in + * the eDP case where the panel wants to read the EDID before + * the bridge is added. We always consistently have HPD disabled + * for eDP. + */ + mutex_lock(&pdata->comms_mutex); + if (pdata->comms_enabled) + regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, + HPD_DISABLE, 0); + mutex_unlock(&pdata->comms_mutex); + }; drm_bridge_add(&pdata->bridge); diff --git a/drivers/gpu/drm/display/drm_bridge_connector.c b/drivers/gpu/drm/display/drm_bridge_connector.c index 7d2e499ea5de..262e93e07a28 100644 --- a/drivers/gpu/drm/display/drm_bridge_connector.c +++ b/drivers/gpu/drm/display/drm_bridge_connector.c @@ -708,11 +708,14 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, if (bridge_connector->bridge_hdmi_audio || bridge_connector->bridge_dp_audio) { struct device *dev; + struct drm_bridge *bridge; if (bridge_connector->bridge_hdmi_audio) - dev = bridge_connector->bridge_hdmi_audio->hdmi_audio_dev; + bridge = bridge_connector->bridge_hdmi_audio; else - dev = bridge_connector->bridge_dp_audio->hdmi_audio_dev; + bridge = bridge_connector->bridge_dp_audio; + + dev = bridge->hdmi_audio_dev; ret = drm_connector_hdmi_audio_init(connector, dev, &drm_bridge_connector_hdmi_audio_funcs, diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index f2a6559a2710..dc622c78db9d 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -725,7 +725,7 @@ ssize_t drm_dp_dpcd_read(struct drm_dp_aux *aux, unsigned int offset, * monitor doesn't power down exactly after the throw away read. */ if (!aux->is_remote) { - ret = drm_dp_dpcd_probe(aux, DP_DPCD_REV); + ret = drm_dp_dpcd_probe(aux, DP_LANE0_1_STATUS); if (ret < 0) return ret; } diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index b781601946db..63a70f285cce 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -862,11 +862,23 @@ EXPORT_SYMBOL_FOR_TESTS_ONLY(drm_framebuffer_free); int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, const struct drm_framebuffer_funcs *funcs) { + unsigned int i; int ret; + bool exists; if (WARN_ON_ONCE(fb->dev != dev || !fb->format)) return -EINVAL; + for (i = 0; i < fb->format->num_planes; i++) { + if (drm_WARN_ON_ONCE(dev, fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i))) + fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + if (fb->obj[i]) { + exists = drm_gem_object_handle_get_if_exists_unlocked(fb->obj[i]); + if (exists) + fb->internal_flags |= DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + } + } + INIT_LIST_HEAD(&fb->filp_head); fb->funcs = funcs; @@ -875,7 +887,7 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, ret = __drm_mode_object_add(dev, &fb->base, DRM_MODE_OBJECT_FB, false, drm_framebuffer_free); if (ret) - goto out; + goto err; mutex_lock(&dev->mode_config.fb_lock); dev->mode_config.num_fb++; @@ -883,7 +895,16 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, mutex_unlock(&dev->mode_config.fb_lock); drm_mode_object_register(dev, &fb->base); -out: + + return 0; + +err: + for (i = 0; i < fb->format->num_planes; i++) { + if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)) { + drm_gem_object_handle_put_unlocked(fb->obj[i]); + fb->internal_flags &= ~DRM_FRAMEBUFFER_HAS_HANDLE_REF(i); + } + } return ret; } EXPORT_SYMBOL(drm_framebuffer_init); @@ -960,6 +981,12 @@ EXPORT_SYMBOL(drm_framebuffer_unregister_private); void drm_framebuffer_cleanup(struct drm_framebuffer *fb) { struct drm_device *dev = fb->dev; + unsigned int i; + + for (i = 0; i < fb->format->num_planes; i++) { + if (fb->internal_flags & DRM_FRAMEBUFFER_HAS_HANDLE_REF(i)) + drm_gem_object_handle_put_unlocked(fb->obj[i]); + } mutex_lock(&dev->mode_config.fb_lock); list_del(&fb->head); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 1e659d2660f7..ac0524595bd6 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -212,6 +212,46 @@ void drm_gem_private_object_fini(struct drm_gem_object *obj) } EXPORT_SYMBOL(drm_gem_private_object_fini); +static void drm_gem_object_handle_get(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + + drm_WARN_ON(dev, !mutex_is_locked(&dev->object_name_lock)); + + if (obj->handle_count++ == 0) + drm_gem_object_get(obj); +} + +/** + * drm_gem_object_handle_get_if_exists_unlocked - acquire reference on user-space handle, if any + * @obj: GEM object + * + * Acquires a reference on the GEM buffer object's handle. Required to keep + * the GEM object alive. Call drm_gem_object_handle_put_if_exists_unlocked() + * to release the reference. Does nothing if the buffer object has no handle. + * + * Returns: + * True if a handle exists, or false otherwise + */ +bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + + guard(mutex)(&dev->object_name_lock); + + /* + * First ref taken during GEM object creation, if any. Some + * drivers set up internal framebuffers with GEM objects that + * do not have a GEM handle. Hence, this counter can be zero. + */ + if (!obj->handle_count) + return false; + + drm_gem_object_handle_get(obj); + + return true; +} + /** * drm_gem_object_handle_free - release resources bound to userspace handles * @obj: GEM object to clean up. @@ -242,20 +282,26 @@ static void drm_gem_object_exported_dma_buf_free(struct drm_gem_object *obj) } } -static void -drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) +/** + * drm_gem_object_handle_put_unlocked - releases reference on user-space handle + * @obj: GEM object + * + * Releases a reference on the GEM buffer object's handle. Possibly releases + * the GEM buffer object and associated dma-buf objects. + */ +void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; bool final = false; - if (WARN_ON(READ_ONCE(obj->handle_count) == 0)) + if (drm_WARN_ON(dev, READ_ONCE(obj->handle_count) == 0)) return; /* - * Must bump handle count first as this may be the last - * ref, in which case the object would disappear before we - * checked for a name - */ + * Must bump handle count first as this may be the last + * ref, in which case the object would disappear before + * we checked for a name. + */ mutex_lock(&dev->object_name_lock); if (--obj->handle_count == 0) { @@ -279,6 +325,9 @@ drm_gem_object_release_handle(int id, void *ptr, void *data) struct drm_file *file_priv = data; struct drm_gem_object *obj = ptr; + if (drm_WARN_ON(obj->dev, !data)) + return 0; + if (obj->funcs->close) obj->funcs->close(obj, file_priv); @@ -389,8 +438,8 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, int ret; WARN_ON(!mutex_is_locked(&dev->object_name_lock)); - if (obj->handle_count++ == 0) - drm_gem_object_get(obj); + + drm_gem_object_handle_get(obj); /* * Get the user-visible handle using idr. Preload and perform @@ -399,7 +448,7 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, idr_preload(GFP_KERNEL); spin_lock(&file_priv->table_lock); - ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT); + ret = idr_alloc(&file_priv->object_idr, NULL, 1, 0, GFP_NOWAIT); spin_unlock(&file_priv->table_lock); idr_preload_end(); @@ -420,6 +469,11 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, goto err_revoke; } + /* mirrors drm_gem_handle_delete to avoid races */ + spin_lock(&file_priv->table_lock); + obj = idr_replace(&file_priv->object_idr, obj, handle); + WARN_ON(obj != NULL); + spin_unlock(&file_priv->table_lock); *handlep = handle; return 0; diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index e44f28fd81d3..60c282881958 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -161,6 +161,8 @@ void drm_sysfs_lease_event(struct drm_device *dev); /* drm_gem.c */ int drm_gem_init(struct drm_device *dev); +bool drm_gem_object_handle_get_if_exists_unlocked(struct drm_gem_object *obj); +void drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj); int drm_gem_handle_create_tail(struct drm_file *file_priv, struct drm_gem_object *obj, u32 *handlep); diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index e5184a0c2465..21fd647f8ce1 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -91,12 +91,13 @@ static const struct dev_pm_ops mipi_dsi_device_pm_ops = { .restore = pm_generic_restore, }; -static const struct bus_type mipi_dsi_bus_type = { +const struct bus_type mipi_dsi_bus_type = { .name = "mipi-dsi", .match = mipi_dsi_device_match, .uevent = mipi_dsi_uevent, .pm = &mipi_dsi_device_pm_ops, }; +EXPORT_SYMBOL_GPL(mipi_dsi_bus_type); /** * of_find_mipi_dsi_device_by_node() - find the MIPI DSI device matching a diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index dd55b1cb764d..18492daae4b3 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -27,7 +27,7 @@ //! * <https://github.com/erwanvivien/fast_qr> //! * <https://github.com/bjguillot/qr> -use kernel::{prelude::*, str::CStr}; +use kernel::prelude::*; #[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd)] struct Version(usize); diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c index edbeab88ff2b..d983ee85cf13 100644 --- a/drivers/gpu/drm/drm_writeback.c +++ b/drivers/gpu/drm/drm_writeback.c @@ -343,17 +343,18 @@ EXPORT_SYMBOL(drm_writeback_connector_init_with_encoder); /** * drm_writeback_connector_cleanup - Cleanup the writeback connector * @dev: DRM device - * @wb_connector: Pointer to the writeback connector to clean up + * @data: Pointer to the writeback connector to clean up * * This will decrement the reference counter of blobs and destroy properties. It * will also clean the remaining jobs in this writeback connector. Caution: This helper will not * clean up the attached encoder and the drm_connector. */ static void drm_writeback_connector_cleanup(struct drm_device *dev, - struct drm_writeback_connector *wb_connector) + void *data) { unsigned long flags; struct drm_writeback_job *pos, *n; + struct drm_writeback_connector *wb_connector = data; delete_writeback_properties(dev); drm_property_blob_put(wb_connector->pixel_formats_blob_ptr); @@ -405,7 +406,7 @@ int drmm_writeback_connector_init(struct drm_device *dev, if (ret) return ret; - ret = drmm_add_action_or_reset(dev, (void *)drm_writeback_connector_cleanup, + ret = drmm_add_action_or_reset(dev, drm_writeback_connector_cleanup, wb_connector); if (ret) return ret; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 76a3a3e517d8..71e2e6b9d713 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -35,6 +35,7 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job *sched_job) { struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job); + struct drm_gpu_scheduler *sched = sched_job->sched; struct etnaviv_gpu *gpu = submit->gpu; u32 dma_addr, primid = 0; int change; @@ -89,7 +90,9 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job return DRM_GPU_SCHED_STAT_NOMINAL; out_no_timeout: - list_add(&sched_job->list, &sched_job->sched->pending_list); + spin_lock(&sched->job_list_lock); + list_add(&sched_job->list, &sched->pending_list); + spin_unlock(&sched->job_list_lock); return DRM_GPU_SCHED_STAT_NOMINAL; } diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index f91daefa9d2b..805aa28c1723 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -636,6 +636,10 @@ static irqreturn_t decon_irq_handler(int irq, void *dev_id) if (!ctx->drm_dev) goto out; + /* check if crtc and vblank have been initialized properly */ + if (!drm_dev_has_vblank(ctx->drm_dev)) + goto out; + if (!ctx->i80_if) { drm_crtc_handle_vblank(&ctx->crtc->base); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index c394cc702d7d..205c238cc73a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -187,6 +187,7 @@ struct fimd_context { u32 i80ifcon; bool i80_if; bool suspended; + bool dp_clk_enabled; wait_queue_head_t wait_vsync_queue; atomic_t wait_vsync_event; atomic_t win_updated; @@ -1047,7 +1048,18 @@ static void fimd_dp_clock_enable(struct exynos_drm_clk *clk, bool enable) struct fimd_context *ctx = container_of(clk, struct fimd_context, dp_clk); u32 val = enable ? DP_MIE_CLK_DP_ENABLE : DP_MIE_CLK_DISABLE; + + if (enable == ctx->dp_clk_enabled) + return; + + if (enable) + pm_runtime_resume_and_get(ctx->dev); + + ctx->dp_clk_enabled = enable; writel(val, ctx->regs + DP_MIE_CLKCON); + + if (!enable) + pm_runtime_put(ctx->dev); } static const struct exynos_drm_crtc_ops fimd_crtc_ops = { diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 4787fee4696f..d44401a695e2 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -174,7 +174,7 @@ static struct exynos_drm_gem *exynos_drm_gem_init(struct drm_device *dev, return ERR_PTR(ret); } - DRM_DEV_DEBUG_KMS(dev->dev, "created file object = %pK\n", obj->filp); + DRM_DEV_DEBUG_KMS(dev->dev, "created file object = %p\n", obj->filp); return exynos_gem; } diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c index ea9f66037600..03c8490af4f4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c @@ -271,7 +271,7 @@ static inline struct exynos_drm_ipp_task * task->src.rect.h = task->dst.rect.h = UINT_MAX; task->transform.rotation = DRM_MODE_ROTATE_0; - DRM_DEV_DEBUG_DRIVER(task->dev, "Allocated task %pK\n", task); + DRM_DEV_DEBUG_DRIVER(task->dev, "Allocated task %p\n", task); return task; } @@ -339,7 +339,7 @@ static int exynos_drm_ipp_task_set(struct exynos_drm_ipp_task *task, } DRM_DEV_DEBUG_DRIVER(task->dev, - "Got task %pK configuration from userspace\n", + "Got task %p configuration from userspace\n", task); return 0; } @@ -394,7 +394,7 @@ static void exynos_drm_ipp_task_release_buf(struct exynos_drm_ipp_buffer *buf) static void exynos_drm_ipp_task_free(struct exynos_drm_ipp *ipp, struct exynos_drm_ipp_task *task) { - DRM_DEV_DEBUG_DRIVER(task->dev, "Freeing task %pK\n", task); + DRM_DEV_DEBUG_DRIVER(task->dev, "Freeing task %p\n", task); exynos_drm_ipp_task_release_buf(&task->src); exynos_drm_ipp_task_release_buf(&task->dst); @@ -559,7 +559,7 @@ static int exynos_drm_ipp_check_format(struct exynos_drm_ipp_task *task, DRM_EXYNOS_IPP_FORMAT_DESTINATION); if (!fmt) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: %s format not supported\n", + "Task %p: %s format not supported\n", task, buf == src ? "src" : "dst"); return -EINVAL; } @@ -609,7 +609,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) bool rotate = (rotation != DRM_MODE_ROTATE_0); bool scale = false; - DRM_DEV_DEBUG_DRIVER(task->dev, "Checking task %pK\n", task); + DRM_DEV_DEBUG_DRIVER(task->dev, "Checking task %p\n", task); if (src->rect.w == UINT_MAX) src->rect.w = src->buf.width; @@ -625,7 +625,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) dst->rect.x + dst->rect.w > (dst->buf.width) || dst->rect.y + dst->rect.h > (dst->buf.height)) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: defined area is outside provided buffers\n", + "Task %p: defined area is outside provided buffers\n", task); return -EINVAL; } @@ -642,7 +642,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_SCALE) && scale) || (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CONVERT) && src->buf.fourcc != dst->buf.fourcc)) { - DRM_DEV_DEBUG_DRIVER(task->dev, "Task %pK: hw capabilities exceeded\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "Task %p: hw capabilities exceeded\n", task); return -EINVAL; } @@ -655,7 +655,7 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) if (ret) return ret; - DRM_DEV_DEBUG_DRIVER(ipp->dev, "Task %pK: all checks done.\n", + DRM_DEV_DEBUG_DRIVER(ipp->dev, "Task %p: all checks done.\n", task); return ret; @@ -667,25 +667,25 @@ static int exynos_drm_ipp_task_setup_buffers(struct exynos_drm_ipp_task *task, struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst; int ret = 0; - DRM_DEV_DEBUG_DRIVER(task->dev, "Setting buffer for task %pK\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "Setting buffer for task %p\n", task); ret = exynos_drm_ipp_task_setup_buffer(src, filp); if (ret) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: src buffer setup failed\n", + "Task %p: src buffer setup failed\n", task); return ret; } ret = exynos_drm_ipp_task_setup_buffer(dst, filp); if (ret) { DRM_DEV_DEBUG_DRIVER(task->dev, - "Task %pK: dst buffer setup failed\n", + "Task %p: dst buffer setup failed\n", task); return ret; } - DRM_DEV_DEBUG_DRIVER(task->dev, "Task %pK: buffers prepared.\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "Task %p: buffers prepared.\n", task); return ret; @@ -764,7 +764,7 @@ void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret) struct exynos_drm_ipp *ipp = task->ipp; unsigned long flags; - DRM_DEV_DEBUG_DRIVER(task->dev, "ipp: %d, task %pK done: %d\n", + DRM_DEV_DEBUG_DRIVER(task->dev, "ipp: %d, task %p done: %d\n", ipp->id, task, ret); spin_lock_irqsave(&ipp->lock, flags); @@ -807,7 +807,7 @@ static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp) spin_unlock_irqrestore(&ipp->lock, flags); DRM_DEV_DEBUG_DRIVER(ipp->dev, - "ipp: %d, selected task %pK to run\n", ipp->id, + "ipp: %d, selected task %p to run\n", ipp->id, task); ret = ipp->funcs->commit(ipp, task); @@ -917,14 +917,14 @@ int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, void *data, */ if (arg->flags & DRM_EXYNOS_IPP_FLAG_NONBLOCK) { DRM_DEV_DEBUG_DRIVER(ipp->dev, - "ipp: %d, nonblocking processing task %pK\n", + "ipp: %d, nonblocking processing task %p\n", ipp->id, task); task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC; exynos_drm_ipp_schedule_task(task->ipp, task); ret = 0; } else { - DRM_DEV_DEBUG_DRIVER(ipp->dev, "ipp: %d, processing task %pK\n", + DRM_DEV_DEBUG_DRIVER(ipp->dev, "ipp: %d, processing task %p\n", ipp->id, task); exynos_drm_ipp_schedule_task(ipp, task); ret = wait_event_interruptible(ipp->done_wq, diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index ba7b8938b17c..166ee11831ab 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1938,7 +1938,7 @@ static int get_init_otp_deassert_fragment_len(struct intel_display *display, int index, len; if (drm_WARN_ON(display->drm, - !data || panel->vbt.dsi.seq_version != 1)) + !data || panel->vbt.dsi.seq_version >= 3)) return 0; /* index = 1 to skip sequence byte */ @@ -1961,7 +1961,7 @@ static int get_init_otp_deassert_fragment_len(struct intel_display *display, } /* - * Some v1 VBT MIPI sequences do the deassert in the init OTP sequence. + * Some v1/v2 VBT MIPI sequences do the deassert in the init OTP sequence. * The deassert must be done before calling intel_dsi_device_ready, so for * these devices we split the init OTP sequence into a deassert sequence and * the actual init OTP part. @@ -1972,9 +1972,9 @@ static void vlv_fixup_mipi_sequences(struct intel_display *display, u8 *init_otp; int len; - /* Limit this to v1 vid-mode sequences */ + /* Limit this to v1/v2 vid-mode sequences */ if (panel->vbt.dsi.config->is_cmd_mode || - panel->vbt.dsi.seq_version != 1) + panel->vbt.dsi.seq_version >= 3) return; /* Only do this if there are otp and assert seqs and no deassert seq */ diff --git a/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c b/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c index 74bb3bedf30f..5111bdc3075b 100644 --- a/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c +++ b/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c @@ -103,8 +103,8 @@ static void get_ana_cp_int_prop(u64 vco_clk, DIV_ROUND_DOWN_ULL(curve_1_interpolated, CURVE0_MULTIPLIER))); ana_cp_int_temp = - DIV_ROUND_CLOSEST_ULL(DIV_ROUND_DOWN_ULL(adjusted_vco_clk1, curve_2_scaled1), - CURVE2_MULTIPLIER); + DIV64_U64_ROUND_CLOSEST(DIV_ROUND_DOWN_ULL(adjusted_vco_clk1, curve_2_scaled1), + CURVE2_MULTIPLIER); *ana_cp_int = max(1, min(ana_cp_int_temp, 127)); diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 346737f15fa9..2007bb9d974d 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1056,7 +1056,7 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder, BXT_MIPI_TRANS_VACTIVE(port)); adjusted_mode->crtc_vtotal = intel_de_read(display, - BXT_MIPI_TRANS_VTOTAL(port)); + BXT_MIPI_TRANS_VTOTAL(port)) + 1; hactive = adjusted_mode->crtc_hdisplay; hfp = intel_de_read(display, MIPI_HFP_COUNT(display, port)); @@ -1260,7 +1260,7 @@ static void set_dsi_timings(struct intel_encoder *encoder, intel_de_write(display, BXT_MIPI_TRANS_VACTIVE(port), adjusted_mode->crtc_vdisplay); intel_de_write(display, BXT_MIPI_TRANS_VTOTAL(port), - adjusted_mode->crtc_vtotal); + adjusted_mode->crtc_vtotal - 1); } intel_de_write(display, MIPI_HACTIVE_AREA_COUNT(display, port), @@ -1589,8 +1589,8 @@ static void vlv_dsi_add_properties(struct intel_connector *connector) static void vlv_dphy_param_init(struct intel_dsi *intel_dsi) { + struct intel_display *display = to_intel_display(&intel_dsi->base); struct intel_connector *connector = intel_dsi->attached_connector; - struct intel_display *display = to_intel_display(connector); struct mipi_config *mipi_config = connector->panel.vbt.dsi.config; u32 tlpx_ns, extra_byte_count, tlpx_ui; u32 ui_num, ui_den; diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c index 1e925c75fb08..c43febc862dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gsc.c +++ b/drivers/gpu/drm/i915/gt/intel_gsc.c @@ -284,7 +284,7 @@ static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id) if (gt->gsc.intf[intf_id].irq < 0) return; - ret = generic_handle_irq(gt->gsc.intf[intf_id].irq); + ret = generic_handle_irq_safe(gt->gsc.intf[intf_id].irq); if (ret) gt_err_ratelimited(gt, "error handling GSC irq: %d\n", ret); } diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index a876a34455f1..2a6d79abf25b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -610,7 +610,6 @@ static int ring_context_alloc(struct intel_context *ce) /* One ringbuffer to rule them all */ GEM_BUG_ON(!engine->legacy.ring); ce->ring = engine->legacy.ring; - ce->timeline = intel_timeline_get(engine->legacy.timeline); GEM_BUG_ON(ce->state); if (engine->context_size) { @@ -623,6 +622,8 @@ static int ring_context_alloc(struct intel_context *ce) ce->state = vma; } + ce->timeline = intel_timeline_get(engine->legacy.timeline); + return 0; } diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index e5a188ce3185..5bc696bfbb0f 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -108,11 +108,11 @@ static unsigned int config_bit(const u64 config) return other_bit(config); } -static u32 config_mask(const u64 config) +static __always_inline u32 config_mask(const u64 config) { unsigned int bit = config_bit(config); - if (__builtin_constant_p(config)) + if (__builtin_constant_p(bit)) BUILD_BUG_ON(bit > BITS_PER_TYPE(typeof_member(struct i915_pmu, enable)) - 1); @@ -121,7 +121,7 @@ static u32 config_mask(const u64 config) BITS_PER_TYPE(typeof_member(struct i915_pmu, enable)) - 1); - return BIT(config_bit(config)); + return BIT(bit); } static bool is_engine_event(struct perf_event *event) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 88870844b5bd..2fb7a9e7efec 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -73,8 +73,8 @@ static int igt_add_request(void *arg) /* Basic preliminary test to create a request and let it loose! */ request = mock_request(rcs0(i915)->kernel_context, HZ / 10); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); i915_request_add(request); @@ -91,8 +91,8 @@ static int igt_wait_request(void *arg) /* Submit a request, then wait upon it */ request = mock_request(rcs0(i915)->kernel_context, T); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); i915_request_get(request); @@ -160,8 +160,8 @@ static int igt_fence_wait(void *arg) /* Submit a request, treat it as a fence and wait upon it */ request = mock_request(rcs0(i915)->kernel_context, T); - if (!request) - return -ENOMEM; + if (IS_ERR(request)) + return PTR_ERR(request); if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { pr_err("fence wait success before submit (expected timeout)!\n"); @@ -219,8 +219,8 @@ static int igt_request_rewind(void *arg) GEM_BUG_ON(IS_ERR(ce)); request = mock_request(ce, 2 * HZ); intel_context_put(ce); - if (!request) { - err = -ENOMEM; + if (IS_ERR(request)) { + err = PTR_ERR(request); goto err_context_0; } @@ -237,8 +237,8 @@ static int igt_request_rewind(void *arg) GEM_BUG_ON(IS_ERR(ce)); vip = mock_request(ce, 0); intel_context_put(ce); - if (!vip) { - err = -ENOMEM; + if (IS_ERR(vip)) { + err = PTR_ERR(vip); goto err_context_1; } diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index 09f747228dff..1b0cf073e964 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -35,7 +35,7 @@ mock_request(struct intel_context *ce, unsigned long delay) /* NB the i915->requests slab cache is enlarged to fit mock_request */ request = intel_context_create_request(ce); if (IS_ERR(request)) - return NULL; + return request; request->mock.delay = delay; return request; diff --git a/drivers/gpu/drm/imagination/pvr_power.c b/drivers/gpu/drm/imagination/pvr_power.c index 41f5d89e78b8..3e349d039fc0 100644 --- a/drivers/gpu/drm/imagination/pvr_power.c +++ b/drivers/gpu/drm/imagination/pvr_power.c @@ -386,13 +386,13 @@ pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset) if (!err) { if (hard_reset) { pvr_dev->fw_dev.booted = false; - WARN_ON(pm_runtime_force_suspend(from_pvr_device(pvr_dev)->dev)); + WARN_ON(pvr_power_device_suspend(from_pvr_device(pvr_dev)->dev)); err = pvr_fw_hard_reset(pvr_dev); if (err) goto err_device_lost; - err = pm_runtime_force_resume(from_pvr_device(pvr_dev)->dev); + err = pvr_power_device_resume(from_pvr_device(pvr_dev)->dev); pvr_dev->fw_dev.booted = true; if (err) goto err_device_lost; diff --git a/drivers/gpu/drm/mgag200/mgag200_ddc.c b/drivers/gpu/drm/mgag200/mgag200_ddc.c index 6d81ea8931e8..c31673eaa554 100644 --- a/drivers/gpu/drm/mgag200/mgag200_ddc.c +++ b/drivers/gpu/drm/mgag200/mgag200_ddc.c @@ -26,7 +26,6 @@ * Authors: Dave Airlie <airlied@redhat.com> */ -#include <linux/export.h> #include <linux/i2c-algo-bit.h> #include <linux/i2c.h> #include <linux/pci.h> diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c index 39641551eeb6..4280f71e472a 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c +++ b/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c @@ -71,10 +71,6 @@ static int a2xx_gpummu_unmap(struct msm_mmu *mmu, uint64_t iova, size_t len) return 0; } -static void a2xx_gpummu_resume_translation(struct msm_mmu *mmu) -{ -} - static void a2xx_gpummu_destroy(struct msm_mmu *mmu) { struct a2xx_gpummu *gpummu = to_a2xx_gpummu(mmu); @@ -90,7 +86,6 @@ static const struct msm_mmu_funcs funcs = { .map = a2xx_gpummu_map, .unmap = a2xx_gpummu_unmap, .destroy = a2xx_gpummu_destroy, - .resume_translation = a2xx_gpummu_resume_translation, }; struct msm_mmu *a2xx_gpummu_new(struct device *dev, struct msm_gpu *gpu) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 650e5bac225f..60aef0796236 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -131,6 +131,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + adreno_check_and_reenable_stall(adreno_gpu); + if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) { ring->cur_ctx_seqno = 0; a5xx_submit_in_rb(gpu, submit); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index bf3758f010f4..491fde0083a2 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -130,6 +130,20 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, submit->seqno - 1); + + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); + OUT_RING(ring, CP_SET_THREAD_BOTH); + + /* Reset state used to synchronize BR and BV */ + OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1); + OUT_RING(ring, + CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS | + CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE | + CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER | + CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS); + + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); + OUT_RING(ring, CP_SET_THREAD_BR); } if (!sysprof) { @@ -212,6 +226,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + adreno_check_and_reenable_stall(adreno_gpu); + a6xx_set_pagetable(a6xx_gpu, ring, submit); get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), @@ -335,6 +351,8 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + adreno_check_and_reenable_stall(adreno_gpu); + /* * Toggle concurrent binning for pagetable switch and set the thread to * BR since only it can execute the pagetable switch packets. diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index f5e1490d07c1..16e7ac444efd 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -137,9 +137,8 @@ err_disable_rpm: return NULL; } -static int find_chipid(struct device *dev, uint32_t *chipid) +static int find_chipid(struct device_node *node, uint32_t *chipid) { - struct device_node *node = dev->of_node; const char *compat; int ret; @@ -173,15 +172,36 @@ static int find_chipid(struct device *dev, uint32_t *chipid) /* and if that fails, fall back to legacy "qcom,chipid" property: */ ret = of_property_read_u32(node, "qcom,chipid", chipid); if (ret) { - DRM_DEV_ERROR(dev, "could not parse qcom,chipid: %d\n", ret); + DRM_ERROR("%pOF: could not parse qcom,chipid: %d\n", + node, ret); return ret; } - dev_warn(dev, "Using legacy qcom,chipid binding!\n"); + pr_warn("%pOF: Using legacy qcom,chipid binding!\n", node); return 0; } +bool adreno_has_gpu(struct device_node *node) +{ + const struct adreno_info *info; + uint32_t chip_id; + int ret; + + ret = find_chipid(node, &chip_id); + if (ret) + return false; + + info = adreno_info(chip_id); + if (!info) { + pr_warn("%pOF: Unknown GPU revision: %"ADRENO_CHIPID_FMT"\n", + node, ADRENO_CHIPID_ARGS(chip_id)); + return false; + } + + return true; +} + static int adreno_bind(struct device *dev, struct device *master, void *data) { static struct adreno_platform_config config = {}; @@ -191,19 +211,18 @@ static int adreno_bind(struct device *dev, struct device *master, void *data) struct msm_gpu *gpu; int ret; - ret = find_chipid(dev, &config.chip_id); - if (ret) + ret = find_chipid(dev->of_node, &config.chip_id); + /* We shouldn't have gotten this far if we can't parse the chip_id */ + if (WARN_ON(ret)) return ret; dev->platform_data = &config; priv->gpu_pdev = to_platform_device(dev); info = adreno_info(config.chip_id); - if (!info) { - dev_warn(drm->dev, "Unknown GPU revision: %"ADRENO_CHIPID_FMT"\n", - ADRENO_CHIPID_ARGS(config.chip_id)); + /* We shouldn't have gotten this far if we don't recognize the GPU: */ + if (WARN_ON(!info)) return -ENXIO; - } config.info = info; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 2348ffb35f7e..86bff915c3e7 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -259,24 +259,54 @@ u64 adreno_private_address_space_size(struct msm_gpu *gpu) return BIT(ttbr1_cfg->ias) - ADRENO_VM_START; } +void adreno_check_and_reenable_stall(struct adreno_gpu *adreno_gpu) +{ + struct msm_gpu *gpu = &adreno_gpu->base; + struct msm_drm_private *priv = gpu->dev->dev_private; + unsigned long flags; + + /* + * Wait until the cooldown period has passed and we would actually + * collect a crashdump to re-enable stall-on-fault. + */ + spin_lock_irqsave(&priv->fault_stall_lock, flags); + if (!priv->stall_enabled && + ktime_after(ktime_get(), priv->stall_reenable_time) && + !READ_ONCE(gpu->crashstate)) { + priv->stall_enabled = true; + + gpu->aspace->mmu->funcs->set_stall(gpu->aspace->mmu, true); + } + spin_unlock_irqrestore(&priv->fault_stall_lock, flags); +} + #define ARM_SMMU_FSR_TF BIT(1) #define ARM_SMMU_FSR_PF BIT(3) #define ARM_SMMU_FSR_EF BIT(4) +#define ARM_SMMU_FSR_SS BIT(30) int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, struct adreno_smmu_fault_info *info, const char *block, u32 scratch[4]) { + struct msm_drm_private *priv = gpu->dev->dev_private; const char *type = "UNKNOWN"; - bool do_devcoredump = info && !READ_ONCE(gpu->crashstate); + bool do_devcoredump = info && (info->fsr & ARM_SMMU_FSR_SS) && + !READ_ONCE(gpu->crashstate); + unsigned long irq_flags; /* - * If we aren't going to be resuming later from fault_worker, then do - * it now. + * In case there is a subsequent storm of pagefaults, disable + * stall-on-fault for at least half a second. */ - if (!do_devcoredump) { - gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu); + spin_lock_irqsave(&priv->fault_stall_lock, irq_flags); + if (priv->stall_enabled) { + priv->stall_enabled = false; + + gpu->aspace->mmu->funcs->set_stall(gpu->aspace->mmu, false); } + priv->stall_reenable_time = ktime_add_ms(ktime_get(), 500); + spin_unlock_irqrestore(&priv->fault_stall_lock, irq_flags); /* * Print a default message if we couldn't get the data from the @@ -304,16 +334,18 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, scratch[0], scratch[1], scratch[2], scratch[3]); if (do_devcoredump) { + struct msm_gpu_fault_info fault_info = {}; + /* Turn off the hangcheck timer to keep it from bothering us */ timer_delete(&gpu->hangcheck_timer); - gpu->fault_info.ttbr0 = info->ttbr0; - gpu->fault_info.iova = iova; - gpu->fault_info.flags = flags; - gpu->fault_info.type = type; - gpu->fault_info.block = block; + fault_info.ttbr0 = info->ttbr0; + fault_info.iova = iova; + fault_info.flags = flags; + fault_info.type = type; + fault_info.block = block; - kthread_queue_work(gpu->worker, &gpu->fault_work); + msm_gpu_fault_crashstate_capture(gpu, &fault_info); } return 0; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index a8f4bf416e64..bc063594a359 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -636,6 +636,8 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, struct adreno_smmu_fault_info *info, const char *block, u32 scratch[4]); +void adreno_check_and_reenable_stall(struct adreno_gpu *gpu); + int adreno_read_speedbin(struct device *dev, u32 *speedbin); /* diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index 8a618841e3ea..1c468ca5d692 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -94,17 +94,21 @@ static void drm_mode_to_intf_timing_params( timing->vsync_polarity = 0; } - /* for DP/EDP, Shift timings to align it to bottom right */ - if (phys_enc->hw_intf->cap->type == INTF_DP) { + timing->wide_bus_en = dpu_encoder_is_widebus_enabled(phys_enc->parent); + timing->compression_en = dpu_encoder_is_dsc_enabled(phys_enc->parent); + + /* + * For DP/EDP, Shift timings to align it to bottom right. + * wide_bus_en is set for everything excluding SDM845 & + * porch changes cause DisplayPort failure and HDMI tearing. + */ + if (phys_enc->hw_intf->cap->type == INTF_DP && timing->wide_bus_en) { timing->h_back_porch += timing->h_front_porch; timing->h_front_porch = 0; timing->v_back_porch += timing->v_front_porch; timing->v_front_porch = 0; } - timing->wide_bus_en = dpu_encoder_is_widebus_enabled(phys_enc->parent); - timing->compression_en = dpu_encoder_is_dsc_enabled(phys_enc->parent); - /* * for DP, divide the horizonal parameters by 2 when * widebus is enabled diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 386c4669c831..a48e6db4f156 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -128,6 +128,11 @@ static const struct msm_dp_desc msm_dp_desc_sa8775p[] = { {} }; +static const struct msm_dp_desc msm_dp_desc_sdm845[] = { + { .io_start = 0x0ae90000, .id = MSM_DP_CONTROLLER_0 }, + {} +}; + static const struct msm_dp_desc msm_dp_desc_sc7180[] = { { .io_start = 0x0ae90000, .id = MSM_DP_CONTROLLER_0, .wide_bus_supported = true }, {} @@ -180,7 +185,7 @@ static const struct of_device_id msm_dp_dt_match[] = { { .compatible = "qcom,sc8180x-edp", .data = &msm_dp_desc_sc8180x }, { .compatible = "qcom,sc8280xp-dp", .data = &msm_dp_desc_sc8280xp }, { .compatible = "qcom,sc8280xp-edp", .data = &msm_dp_desc_sc8280xp }, - { .compatible = "qcom,sdm845-dp", .data = &msm_dp_desc_sc7180 }, + { .compatible = "qcom,sdm845-dp", .data = &msm_dp_desc_sdm845 }, { .compatible = "qcom,sm8350-dp", .data = &msm_dp_desc_sc7180 }, { .compatible = "qcom,sm8650-dp", .data = &msm_dp_desc_sm8650 }, { .compatible = "qcom,x1e80100-dp", .data = &msm_dp_desc_x1e80100 }, diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c index 9812b4d69197..af2e30f3f842 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c @@ -704,6 +704,13 @@ static int dsi_pll_10nm_init(struct msm_dsi_phy *phy) /* TODO: Remove this when we have proper display handover support */ msm_dsi_phy_pll_save_state(phy); + /* + * Store also proper vco_current_rate, because its value will be used in + * dsi_10nm_pll_restore_state(). + */ + if (!dsi_pll_10nm_vco_recalc_rate(&pll_10nm->clk_hw, VCO_REF_CLK_RATE)) + pll_10nm->vco_current_rate = pll_10nm->phy->cfg->min_pll_rate; + return 0; } diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index 7ab607252d18..6af72162cda4 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -208,6 +208,35 @@ DEFINE_DEBUGFS_ATTRIBUTE(shrink_fops, shrink_get, shrink_set, "0x%08llx\n"); +/* + * Return the number of microseconds to wait until stall-on-fault is + * re-enabled. If 0 then it is already enabled or will be re-enabled on the + * next submit (unless there's a leftover devcoredump). This is useful for + * kernel tests that intentionally produce a fault and check the devcoredump to + * wait until the cooldown period is over. + */ + +static int +stall_reenable_time_get(void *data, u64 *val) +{ + struct msm_drm_private *priv = data; + unsigned long irq_flags; + + spin_lock_irqsave(&priv->fault_stall_lock, irq_flags); + + if (priv->stall_enabled) + *val = 0; + else + *val = max(ktime_us_delta(priv->stall_reenable_time, ktime_get()), 0); + + spin_unlock_irqrestore(&priv->fault_stall_lock, irq_flags); + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(stall_reenable_time_fops, + stall_reenable_time_get, NULL, + "%lld\n"); static int msm_gem_show(struct seq_file *m, void *arg) { @@ -319,6 +348,9 @@ static void msm_debugfs_gpu_init(struct drm_minor *minor) debugfs_create_bool("disable_err_irq", 0600, minor->debugfs_root, &priv->disable_err_irq); + debugfs_create_file("stall_reenable_time_us", 0400, minor->debugfs_root, + priv, &stall_reenable_time_fops); + gpu_devfreq = debugfs_create_dir("devfreq", minor->debugfs_root); debugfs_create_bool("idle_clamp",0600, gpu_devfreq, diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index f316e6776f67..d007687c2446 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -245,6 +245,10 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) drm_gem_lru_init(&priv->lru.willneed, &priv->lru.lock); drm_gem_lru_init(&priv->lru.dontneed, &priv->lru.lock); + /* Initialize stall-on-fault */ + spin_lock_init(&priv->fault_stall_lock); + priv->stall_enabled = true; + /* Teach lockdep about lock ordering wrt. shrinker: */ fs_reclaim_acquire(GFP_KERNEL); might_lock(&priv->lru.lock); @@ -926,7 +930,7 @@ static const struct drm_driver msm_driver = { * is no external component that we need to add since LVDS is within MDP4 * itself. */ -static int add_components_mdp(struct device *master_dev, +static int add_mdp_components(struct device *master_dev, struct component_match **matchptr) { struct device_node *np = master_dev->of_node; @@ -1030,7 +1034,7 @@ static int add_gpu_components(struct device *dev, if (!np) return 0; - if (of_device_is_available(np)) + if (of_device_is_available(np) && adreno_has_gpu(np)) drm_of_component_match_add(dev, matchptr, component_compare_of, np); of_node_put(np); @@ -1071,7 +1075,7 @@ int msm_drv_probe(struct device *master_dev, /* Add mdp components if we have KMS. */ if (kms_init) { - ret = add_components_mdp(master_dev, &match); + ret = add_mdp_components(master_dev, &match); if (ret) return ret; } diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index a65077855201..c8afb1ea6040 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -222,6 +222,29 @@ struct msm_drm_private { * the sw hangcheck mechanism. */ bool disable_err_irq; + + /** + * @fault_stall_lock: + * + * Serialize changes to stall-on-fault state. + */ + spinlock_t fault_stall_lock; + + /** + * @fault_stall_reenable_time: + * + * If stall_enabled is false, when to reenable stall-on-fault. + * Protected by @fault_stall_lock. + */ + ktime_t stall_reenable_time; + + /** + * @stall_enabled: + * + * Whether stall-on-fault is currently enabled. Protected by + * @fault_stall_lock. + */ + bool stall_enabled; }; const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format, uint64_t modifier); diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 3e9aa2cc38ef..d4f71bb54e84 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -85,6 +85,15 @@ void __msm_gem_submit_destroy(struct kref *kref) container_of(kref, struct msm_gem_submit, ref); unsigned i; + /* + * In error paths, we could unref the submit without calling + * drm_sched_entity_push_job(), so msm_job_free() will never + * get called. Since drm_sched_job_cleanup() will NULL out + * s_fence, we can use that to detect this case. + */ + if (submit->base.s_fence) + drm_sched_job_cleanup(&submit->base); + if (submit->fence_id) { spin_lock(&submit->queue->idr_lock); idr_remove(&submit->queue->fence_idr, submit->fence_id); @@ -649,6 +658,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_ringbuffer *ring; struct msm_submit_post_dep *post_deps = NULL; struct drm_syncobj **syncobjs_to_reset = NULL; + struct sync_file *sync_file = NULL; int out_fence_fd = -1; unsigned i; int ret; @@ -858,7 +868,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, } if (ret == 0 && args->flags & MSM_SUBMIT_FENCE_FD_OUT) { - struct sync_file *sync_file = sync_file_create(submit->user_fence); + sync_file = sync_file_create(submit->user_fence); if (!sync_file) { ret = -ENOMEM; } else { @@ -892,8 +902,11 @@ out: out_unlock: mutex_unlock(&queue->lock); out_post_unlock: - if (ret && (out_fence_fd >= 0)) + if (ret && (out_fence_fd >= 0)) { put_unused_fd(out_fence_fd); + if (sync_file) + fput(sync_file->file); + } if (!IS_ERR_OR_NULL(submit)) { msm_gem_submit_put(submit); diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 197871fdf508..3947f7ba1421 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -257,7 +257,8 @@ out: } static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, - struct msm_gem_submit *submit, char *comm, char *cmd) + struct msm_gem_submit *submit, struct msm_gpu_fault_info *fault_info, + char *comm, char *cmd) { struct msm_gpu_state *state; @@ -276,7 +277,8 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, /* Fill in the additional crash state information */ state->comm = kstrdup(comm, GFP_KERNEL); state->cmd = kstrdup(cmd, GFP_KERNEL); - state->fault_info = gpu->fault_info; + if (fault_info) + state->fault_info = *fault_info; if (submit) { int i; @@ -308,7 +310,8 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, } #else static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, - struct msm_gem_submit *submit, char *comm, char *cmd) + struct msm_gem_submit *submit, struct msm_gpu_fault_info *fault_info, + char *comm, char *cmd) { } #endif @@ -405,7 +408,7 @@ static void recover_worker(struct kthread_work *work) /* Record the crash state */ pm_runtime_get_sync(&gpu->pdev->dev); - msm_gpu_crashstate_capture(gpu, submit, comm, cmd); + msm_gpu_crashstate_capture(gpu, submit, NULL, comm, cmd); kfree(cmd); kfree(comm); @@ -459,9 +462,8 @@ out_unlock: msm_gpu_retire(gpu); } -static void fault_worker(struct kthread_work *work) +void msm_gpu_fault_crashstate_capture(struct msm_gpu *gpu, struct msm_gpu_fault_info *fault_info) { - struct msm_gpu *gpu = container_of(work, struct msm_gpu, fault_work); struct msm_gem_submit *submit; struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); char *comm = NULL, *cmd = NULL; @@ -484,16 +486,13 @@ static void fault_worker(struct kthread_work *work) /* Record the crash state */ pm_runtime_get_sync(&gpu->pdev->dev); - msm_gpu_crashstate_capture(gpu, submit, comm, cmd); + msm_gpu_crashstate_capture(gpu, submit, fault_info, comm, cmd); pm_runtime_put_sync(&gpu->pdev->dev); kfree(cmd); kfree(comm); resume_smmu: - memset(&gpu->fault_info, 0, sizeof(gpu->fault_info)); - gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu); - mutex_unlock(&gpu->lock); } @@ -882,7 +881,6 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, init_waitqueue_head(&gpu->retire_event); kthread_init_work(&gpu->retire_work, retire_worker); kthread_init_work(&gpu->recover_work, recover_worker); - kthread_init_work(&gpu->fault_work, fault_worker); priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD; diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index e25009150579..5bf7cd985b9c 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -253,12 +253,6 @@ struct msm_gpu { #define DRM_MSM_HANGCHECK_PROGRESS_RETRIES 3 struct timer_list hangcheck_timer; - /* Fault info for most recent iova fault: */ - struct msm_gpu_fault_info fault_info; - - /* work for handling GPU ioval faults: */ - struct kthread_work fault_work; - /* work for handling GPU recovery: */ struct kthread_work recover_work; @@ -668,6 +662,7 @@ msm_gpu_create_private_address_space(struct msm_gpu *gpu, struct task_struct *ta void msm_gpu_cleanup(struct msm_gpu *gpu); struct msm_gpu *adreno_load_gpu(struct drm_device *dev); +bool adreno_has_gpu(struct device_node *node); void __init adreno_register(void); void __exit adreno_unregister(void); @@ -705,6 +700,8 @@ static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu) mutex_unlock(&gpu->lock); } +void msm_gpu_fault_crashstate_capture(struct msm_gpu *gpu, struct msm_gpu_fault_info *fault_info); + /* * Simple macro to semi-cleanly add the MAP_PRIV flag for targets that can * support expanded privileges diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index fd73dcd3f30e..739ce2c283a4 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -345,7 +345,6 @@ static int msm_gpu_fault_handler(struct iommu_domain *domain, struct device *dev unsigned long iova, int flags, void *arg) { struct msm_iommu *iommu = arg; - struct msm_mmu *mmu = &iommu->base; struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(iommu->base.dev); struct adreno_smmu_fault_info info, *ptr = NULL; @@ -359,9 +358,6 @@ static int msm_gpu_fault_handler(struct iommu_domain *domain, struct device *dev pr_warn_ratelimited("*** fault: iova=%16lx, flags=%d\n", iova, flags); - if (mmu->funcs->resume_translation) - mmu->funcs->resume_translation(mmu); - return 0; } @@ -376,12 +372,12 @@ static int msm_disp_fault_handler(struct iommu_domain *domain, struct device *de return -ENOSYS; } -static void msm_iommu_resume_translation(struct msm_mmu *mmu) +static void msm_iommu_set_stall(struct msm_mmu *mmu, bool enable) { struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(mmu->dev); - if (adreno_smmu->resume_translation) - adreno_smmu->resume_translation(adreno_smmu->cookie, true); + if (adreno_smmu->set_stall) + adreno_smmu->set_stall(adreno_smmu->cookie, enable); } static void msm_iommu_detach(struct msm_mmu *mmu) @@ -431,7 +427,7 @@ static const struct msm_mmu_funcs funcs = { .map = msm_iommu_map, .unmap = msm_iommu_unmap, .destroy = msm_iommu_destroy, - .resume_translation = msm_iommu_resume_translation, + .set_stall = msm_iommu_set_stall, }; struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks) diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h index daf91529e02b..0c694907140d 100644 --- a/drivers/gpu/drm/msm/msm_mmu.h +++ b/drivers/gpu/drm/msm/msm_mmu.h @@ -15,7 +15,7 @@ struct msm_mmu_funcs { size_t len, int prot); int (*unmap)(struct msm_mmu *mmu, uint64_t iova, size_t len); void (*destroy)(struct msm_mmu *mmu); - void (*resume_translation)(struct msm_mmu *mmu); + void (*set_stall)(struct msm_mmu *mmu, bool enable); }; enum msm_mmu_type { diff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml index 5a6ae9fc3194..462713401622 100644 --- a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml +++ b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml @@ -2255,7 +2255,8 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <reg32 offset="0" name="0"> <bitfield name="CLEAR_ON_CHIP_TS" pos="0" type="boolean"/> <bitfield name="CLEAR_RESOURCE_TABLE" pos="1" type="boolean"/> - <bitfield name="CLEAR_GLOBAL_LOCAL_TS" pos="2" type="boolean"/> + <bitfield name="CLEAR_BV_BR_COUNTER" pos="2" type="boolean"/> + <bitfield name="RESET_GLOBAL_LOCAL_TS" pos="3" type="boolean"/> </reg32> </domain> diff --git a/drivers/gpu/drm/msm/registers/gen_header.py b/drivers/gpu/drm/msm/registers/gen_header.py index 3926485bb197..a409404627c7 100644 --- a/drivers/gpu/drm/msm/registers/gen_header.py +++ b/drivers/gpu/drm/msm/registers/gen_header.py @@ -11,6 +11,7 @@ import collections import argparse import time import datetime +import re class Error(Exception): def __init__(self, message): @@ -877,13 +878,14 @@ The rules-ng-ng source files this header was generated from are: """) maxlen = 0 for filepath in p.xml_files: - maxlen = max(maxlen, len(filepath)) + new_filepath = re.sub("^.+drivers","drivers",filepath) + maxlen = max(maxlen, len(new_filepath)) for filepath in p.xml_files: - pad = " " * (maxlen - len(filepath)) + pad = " " * (maxlen - len(new_filepath)) filesize = str(os.path.getsize(filepath)) filesize = " " * (7 - len(filesize)) + filesize filetime = time.ctime(os.path.getmtime(filepath)) - print("- " + filepath + pad + " (" + filesize + " bytes, from " + filetime + ")") + print("- " + new_filepath + pad + " (" + filesize + " bytes, from <stripped>)") if p.copyright_year: current_year = str(datetime.date.today().year) print() diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index d47442125fa1..9aae26eb7d8f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -42,7 +42,7 @@ #include "nouveau_acpi.h" static struct ida bl_ida; -#define BL_NAME_SIZE 15 // 12 for name + 2 for digits + 1 for '\0' +#define BL_NAME_SIZE 24 // 12 for name + 11 for digits + 1 for '\0' static bool nouveau_get_backlight_name(char backlight_name[BL_NAME_SIZE], diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c index 200e65a7cefc..c7869a639bef 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c @@ -314,14 +314,10 @@ nouveau_debugfs_fini(struct nouveau_drm *drm) drm->debugfs = NULL; } -int +void nouveau_module_debugfs_init(void) { nouveau_debugfs_root = debugfs_create_dir("nouveau", NULL); - if (IS_ERR(nouveau_debugfs_root)) - return PTR_ERR(nouveau_debugfs_root); - - return 0; } void diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.h b/drivers/gpu/drm/nouveau/nouveau_debugfs.h index b7617b344ee2..d05ed0e641c4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.h +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.h @@ -24,7 +24,7 @@ extern void nouveau_debugfs_fini(struct nouveau_drm *); extern struct dentry *nouveau_debugfs_root; -int nouveau_module_debugfs_init(void); +void nouveau_module_debugfs_init(void); void nouveau_module_debugfs_fini(void); #else static inline void @@ -42,10 +42,9 @@ nouveau_debugfs_fini(struct nouveau_drm *drm) { } -static inline int +static inline void nouveau_module_debugfs_init(void) { - return 0; } static inline void diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 0c82a63cd49d..1527b801f013 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -1461,9 +1461,7 @@ nouveau_drm_init(void) if (!nouveau_modeset) return 0; - ret = nouveau_module_debugfs_init(); - if (ret) - return ret; + nouveau_module_debugfs_init(); #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER platform_driver_register(&nouveau_platform_driver); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c index baf42339f93e..588cb4ab85cb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/gsp.c @@ -719,7 +719,6 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps) union acpi_object argv4 = { .buffer.type = ACPI_TYPE_BUFFER, .buffer.length = 4, - .buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL), }, *obj; caps->status = 0xffff; @@ -727,17 +726,22 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps) if (!acpi_check_dsm(handle, &NVOP_DSM_GUID, NVOP_DSM_REV, BIT_ULL(0x1a))) return; + argv4.buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL); + if (!argv4.buffer.pointer) + return; + obj = acpi_evaluate_dsm(handle, &NVOP_DSM_GUID, NVOP_DSM_REV, 0x1a, &argv4); if (!obj) - return; + goto done; if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || WARN_ON(obj->buffer.length != 4)) - return; + goto done; caps->status = 0; caps->optimusCaps = *(u32 *)obj->buffer.pointer; +done: ACPI_FREE(obj); kfree(argv4.buffer.pointer); @@ -754,24 +758,28 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt) union acpi_object argv4 = { .buffer.type = ACPI_TYPE_BUFFER, .buffer.length = sizeof(caps), - .buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL), }, *obj; jt->status = 0xffff; + argv4.buffer.pointer = kmalloc(argv4.buffer.length, GFP_KERNEL); + if (!argv4.buffer.pointer) + return; + obj = acpi_evaluate_dsm(handle, &JT_DSM_GUID, JT_DSM_REV, 0x1, &argv4); if (!obj) - return; + goto done; if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || WARN_ON(obj->buffer.length != 4)) - return; + goto done; jt->status = 0; jt->jtCaps = *(u32 *)obj->buffer.pointer; jt->jtRevId = (jt->jtCaps & 0xfff00000) >> 20; jt->bSBIOSCaps = 0; +done: ACPI_FREE(obj); kfree(argv4.buffer.pointer); @@ -1744,6 +1752,13 @@ r535_gsp_fini(struct nvkm_gsp *gsp, bool suspend) nvkm_gsp_sg_free(gsp->subdev.device, &gsp->sr.sgt); return ret; } + + /* + * TODO: Debug the GSP firmware / RPC handling to find out why + * without this Turing (but none of the other architectures) + * ends up resetting all channels after resume. + */ + msleep(50); } ret = r535_gsp_rpc_unloading_guest_driver(gsp, suspend); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c index 5acb98d137bd..9d06ff722fea 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c @@ -637,12 +637,18 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, if (payload_size > max_payload_size) { const u32 fn = rpc->function; u32 remain_payload_size = payload_size; + void *next; - /* Adjust length, and send initial RPC. */ - rpc->length = sizeof(*rpc) + max_payload_size; - msg->checksum = rpc->length; + /* Send initial RPC. */ + next = r535_gsp_rpc_get(gsp, fn, max_payload_size); + if (IS_ERR(next)) { + repv = next; + goto done; + } - repv = r535_gsp_rpc_send(gsp, payload, NVKM_GSP_RPC_REPLY_NOWAIT, 0); + memcpy(next, payload, max_payload_size); + + repv = r535_gsp_rpc_send(gsp, next, NVKM_GSP_RPC_REPLY_NOWAIT, 0); if (IS_ERR(repv)) goto done; @@ -653,7 +659,6 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, while (remain_payload_size) { u32 size = min(remain_payload_size, max_payload_size); - void *next; next = r535_gsp_rpc_get(gsp, NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD, size); if (IS_ERR(next)) { @@ -674,6 +679,8 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, /* Wait for reply. */ repv = r535_gsp_rpc_handle_reply(gsp, fn, policy, payload_size + sizeof(*rpc)); + if (!IS_ERR(repv)) + kvfree(msg); } else { repv = r535_gsp_rpc_send(gsp, payload, policy, gsp_rpc_len); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c index 52f2e5f14517..f25ea610cd99 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c @@ -121,7 +121,7 @@ r535_mmu_vaspace_new(struct nvkm_vmm *vmm, u32 handle, bool external) page_shift -= desc->bits; ctrl->levels[i].physAddress = pd->pt[0]->addr; - ctrl->levels[i].size = (1 << desc->bits) * desc->size; + ctrl->levels[i].size = BIT_ULL(desc->bits) * desc->size; ctrl->levels[i].aperture = 1; ctrl->levels[i].pageShift = page_shift; diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 0a3b26bb4d73..9f81fa960b46 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -26,6 +26,7 @@ #include <linux/i2c.h> #include <linux/media-bus-format.h> #include <linux/module.h> +#include <linux/of_device.h> #include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> @@ -136,6 +137,14 @@ struct panel_desc { int connector_type; }; +struct panel_desc_dsi { + struct panel_desc desc; + + unsigned long flags; + enum mipi_dsi_pixel_format format; + unsigned int lanes; +}; + struct panel_simple { struct drm_panel base; @@ -430,10 +439,7 @@ static const struct drm_panel_funcs panel_simple_funcs = { .get_timings = panel_simple_get_timings, }; -static struct panel_desc panel_dpi; - -static int panel_dpi_probe(struct device *dev, - struct panel_simple *panel) +static struct panel_desc *panel_dpi_probe(struct device *dev) { struct display_timing *timing; const struct device_node *np; @@ -445,17 +451,17 @@ static int panel_dpi_probe(struct device *dev, np = dev->of_node; desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL); if (!desc) - return -ENOMEM; + return ERR_PTR(-ENOMEM); timing = devm_kzalloc(dev, sizeof(*timing), GFP_KERNEL); if (!timing) - return -ENOMEM; + return ERR_PTR(-ENOMEM); ret = of_get_display_timing(np, "panel-timing", timing); if (ret < 0) { dev_err(dev, "%pOF: no panel-timing node found for \"panel-dpi\" binding\n", np); - return ret; + return ERR_PTR(ret); } desc->timings = timing; @@ -473,9 +479,7 @@ static int panel_dpi_probe(struct device *dev, /* We do not know the connector for the DT node, so guess it */ desc->connector_type = DRM_MODE_CONNECTOR_DPI; - panel->desc = desc; - - return 0; + return desc; } #define PANEL_SIMPLE_BOUNDS_CHECK(to_check, bounds, field) \ @@ -570,8 +574,44 @@ static int panel_simple_override_nondefault_lvds_datamapping(struct device *dev, return 0; } -static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) +static const struct panel_desc *panel_simple_get_desc(struct device *dev) { + if (IS_ENABLED(CONFIG_DRM_MIPI_DSI) && + dev_is_mipi_dsi(dev)) { + const struct panel_desc_dsi *dsi_desc; + + dsi_desc = of_device_get_match_data(dev); + if (!dsi_desc) + return ERR_PTR(-ENODEV); + + return &dsi_desc->desc; + } + + if (dev_is_platform(dev)) { + const struct panel_desc *desc; + + desc = of_device_get_match_data(dev); + if (!desc) { + /* + * panel-dpi probes without a descriptor and + * panel_dpi_probe() will initialize one for us + * based on the device tree. + */ + if (of_device_is_compatible(dev->of_node, "panel-dpi")) + return panel_dpi_probe(dev); + else + return ERR_PTR(-ENODEV); + } + + return desc; + } + + return ERR_PTR(-ENODEV); +} + +static struct panel_simple *panel_simple_probe(struct device *dev) +{ + const struct panel_desc *desc; struct panel_simple *panel; struct display_timing dt; struct device_node *ddc; @@ -579,27 +619,31 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) u32 bus_flags; int err; + desc = panel_simple_get_desc(dev); + if (IS_ERR(desc)) + return ERR_CAST(desc); + panel = devm_drm_panel_alloc(dev, struct panel_simple, base, &panel_simple_funcs, desc->connector_type); if (IS_ERR(panel)) - return PTR_ERR(panel); + return ERR_CAST(panel); panel->desc = desc; panel->supply = devm_regulator_get(dev, "power"); if (IS_ERR(panel->supply)) - return PTR_ERR(panel->supply); + return ERR_CAST(panel->supply); panel->enable_gpio = devm_gpiod_get_optional(dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(panel->enable_gpio)) - return dev_err_probe(dev, PTR_ERR(panel->enable_gpio), - "failed to request GPIO\n"); + return dev_err_cast_probe(dev, panel->enable_gpio, + "failed to request GPIO\n"); err = of_drm_get_panel_orientation(dev->of_node, &panel->orientation); if (err) { dev_err(dev, "%pOF: failed to get orientation %d\n", dev->of_node, err); - return err; + return ERR_PTR(err); } ddc = of_parse_phandle(dev->of_node, "ddc-i2c-bus", 0); @@ -608,19 +652,12 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) of_node_put(ddc); if (!panel->ddc) - return -EPROBE_DEFER; + return ERR_PTR(-EPROBE_DEFER); } - if (desc == &panel_dpi) { - /* Handle the generic panel-dpi binding */ - err = panel_dpi_probe(dev, panel); - if (err) - goto free_ddc; - desc = panel->desc; - } else { - if (!of_get_display_timing(dev->of_node, "panel-timing", &dt)) - panel_simple_parse_panel_timing_node(dev, panel, &dt); - } + if (!of_device_is_compatible(dev->of_node, "panel-dpi") && + !of_get_display_timing(dev->of_node, "panel-timing", &dt)) + panel_simple_parse_panel_timing_node(dev, panel, &dt); if (desc->connector_type == DRM_MODE_CONNECTOR_LVDS) { /* Optional data-mapping property for overriding bus format */ @@ -703,7 +740,7 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) drm_panel_add(&panel->base); - return 0; + return panel; disable_pm_runtime: pm_runtime_dont_use_autosuspend(dev); @@ -712,7 +749,7 @@ free_ddc: if (panel->ddc) put_device(&panel->ddc->dev); - return err; + return ERR_PTR(err); } static void panel_simple_shutdown(struct device *dev) @@ -5367,7 +5404,12 @@ static const struct of_device_id platform_of_match[] = { }, { /* Must be the last entry */ .compatible = "panel-dpi", - .data = &panel_dpi, + + /* + * Explicitly NULL, the panel_desc structure will be + * allocated by panel_dpi_probe(). + */ + .data = NULL, }, { /* sentinel */ } @@ -5376,13 +5418,13 @@ MODULE_DEVICE_TABLE(of, platform_of_match); static int panel_simple_platform_probe(struct platform_device *pdev) { - const struct panel_desc *desc; + struct panel_simple *panel; - desc = of_device_get_match_data(&pdev->dev); - if (!desc) - return -ENODEV; + panel = panel_simple_probe(&pdev->dev); + if (IS_ERR(panel)) + return PTR_ERR(panel); - return panel_simple_probe(&pdev->dev, desc); + return 0; } static void panel_simple_platform_remove(struct platform_device *pdev) @@ -5412,14 +5454,6 @@ static struct platform_driver panel_simple_platform_driver = { .shutdown = panel_simple_platform_shutdown, }; -struct panel_desc_dsi { - struct panel_desc desc; - - unsigned long flags; - enum mipi_dsi_pixel_format format; - unsigned int lanes; -}; - static const struct drm_display_mode auo_b080uan01_mode = { .clock = 154500, .hdisplay = 1200, @@ -5653,16 +5687,14 @@ MODULE_DEVICE_TABLE(of, dsi_of_match); static int panel_simple_dsi_probe(struct mipi_dsi_device *dsi) { const struct panel_desc_dsi *desc; + struct panel_simple *panel; int err; - desc = of_device_get_match_data(&dsi->dev); - if (!desc) - return -ENODEV; - - err = panel_simple_probe(&dsi->dev, &desc->desc); - if (err < 0) - return err; + panel = panel_simple_probe(&dsi->dev); + if (IS_ERR(panel)) + return PTR_ERR(panel); + desc = container_of(panel->desc, struct panel_desc_dsi, desc); dsi->mode_flags = desc->flags; dsi->format = desc->format; dsi->lanes = desc->lanes; diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c index dd2006d51c7a..eec43d1a5595 100644 --- a/drivers/gpu/drm/solomon/ssd130x.c +++ b/drivers/gpu/drm/solomon/ssd130x.c @@ -974,7 +974,7 @@ static void ssd130x_clear_screen(struct ssd130x_device *ssd130x, u8 *data_array) static void ssd132x_clear_screen(struct ssd130x_device *ssd130x, u8 *data_array) { - unsigned int columns = DIV_ROUND_UP(ssd130x->height, SSD132X_SEGMENT_WIDTH); + unsigned int columns = DIV_ROUND_UP(ssd130x->width, SSD132X_SEGMENT_WIDTH); unsigned int height = ssd130x->height; memset(data_array, 0, columns * height); diff --git a/drivers/gpu/drm/sysfb/vesadrm.c b/drivers/gpu/drm/sysfb/vesadrm.c index 4d62c78e7d1e..f7532db3831f 100644 --- a/drivers/gpu/drm/sysfb/vesadrm.c +++ b/drivers/gpu/drm/sysfb/vesadrm.c @@ -362,14 +362,19 @@ static struct vesadrm_device *vesadrm_device_create(struct drm_driver *drv, if (!__screen_info_vbe_mode_nonvga(si)) { vesa->cmap_write = vesadrm_vga_cmap_write; -#if defined(CONFIG_X86_32) } else { +#if defined(CONFIG_X86_32) phys_addr_t pmi_base = __screen_info_vesapm_info_base(si); - const u16 *pmi_addr = phys_to_virt(pmi_base); - vesa->pmi.PrimaryPalette = (u8 *)pmi_addr + pmi_addr[2]; - vesa->cmap_write = vesadrm_pmi_cmap_write; + if (pmi_base) { + const u16 *pmi_addr = phys_to_virt(pmi_base); + + vesa->pmi.PrimaryPalette = (u8 *)pmi_addr + pmi_addr[2]; + vesa->cmap_write = vesadrm_pmi_cmap_write; + } else #endif + if (format->is_color_indexed) + drm_warn(dev, "hardware palette is unchangeable, colors may be incorrect\n"); } #ifdef CONFIG_X86 diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c index 2d9a0a3f6c38..7a38664e890e 100644 --- a/drivers/gpu/drm/tegra/nvdec.c +++ b/drivers/gpu/drm/tegra/nvdec.c @@ -261,10 +261,8 @@ static int nvdec_load_falcon_firmware(struct nvdec *nvdec) if (!client->group) { virt = dma_alloc_coherent(nvdec->dev, size, &iova, GFP_KERNEL); - - err = dma_mapping_error(nvdec->dev, iova); - if (err < 0) - return err; + if (!virt) + return -ENOMEM; } else { virt = tegra_drm_alloc(tegra, size, &iova); if (IS_ERR(virt)) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 15cab9bda17f..bd90404ea609 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -254,6 +254,13 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, ret = dma_resv_trylock(&fbo->base.base._resv); WARN_ON(!ret); + ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); + if (ret) { + dma_resv_unlock(&fbo->base.base._resv); + kfree(fbo); + return ret; + } + if (fbo->base.resource) { ttm_resource_set_bo(fbo->base.resource, &fbo->base); bo->resource = NULL; @@ -262,12 +269,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, fbo->base.bulk_move = NULL; } - ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); - if (ret) { - kfree(fbo); - return ret; - } - ttm_bo_get(bo); fbo->bo = bo; diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index b51f0b648a08..411e47702f8a 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -101,6 +101,12 @@ enum v3d_gen { V3D_GEN_71 = 71, }; +enum v3d_irq { + V3D_CORE_IRQ, + V3D_HUB_IRQ, + V3D_MAX_IRQS, +}; + struct v3d_dev { struct drm_device drm; @@ -112,6 +118,8 @@ struct v3d_dev { bool single_irq_line; + int irq[V3D_MAX_IRQS]; + struct v3d_perfmon_info perfmon_info; void __iomem *hub_regs; diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index d7d16da78db3..37bf5eecdd2c 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -134,6 +134,8 @@ v3d_reset(struct v3d_dev *v3d) if (false) v3d_idle_axi(v3d, 0); + v3d_irq_disable(v3d); + v3d_idle_gca(v3d); v3d_reset_sms(v3d); v3d_reset_v3d(v3d); diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index 2cca5d3a26a2..a515a301e480 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -260,7 +260,7 @@ v3d_hub_irq(int irq, void *arg) int v3d_irq_init(struct v3d_dev *v3d) { - int irq1, ret, core; + int irq, ret, core; INIT_WORK(&v3d->overflow_mem_work, v3d_overflow_mem_work); @@ -271,17 +271,24 @@ v3d_irq_init(struct v3d_dev *v3d) V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver)); V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS(v3d->ver)); - irq1 = platform_get_irq_optional(v3d_to_pdev(v3d), 1); - if (irq1 == -EPROBE_DEFER) - return irq1; - if (irq1 > 0) { - ret = devm_request_irq(v3d->drm.dev, irq1, + irq = platform_get_irq_optional(v3d_to_pdev(v3d), 1); + if (irq == -EPROBE_DEFER) + return irq; + if (irq > 0) { + v3d->irq[V3D_CORE_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_CORE_IRQ], v3d_irq, IRQF_SHARED, "v3d_core0", v3d); if (ret) goto fail; - ret = devm_request_irq(v3d->drm.dev, - platform_get_irq(v3d_to_pdev(v3d), 0), + + irq = platform_get_irq(v3d_to_pdev(v3d), 0); + if (irq < 0) + return irq; + v3d->irq[V3D_HUB_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_HUB_IRQ], v3d_hub_irq, IRQF_SHARED, "v3d_hub", v3d); if (ret) @@ -289,8 +296,12 @@ v3d_irq_init(struct v3d_dev *v3d) } else { v3d->single_irq_line = true; - ret = devm_request_irq(v3d->drm.dev, - platform_get_irq(v3d_to_pdev(v3d), 0), + irq = platform_get_irq(v3d_to_pdev(v3d), 0); + if (irq < 0) + return irq; + v3d->irq[V3D_CORE_IRQ] = irq; + + ret = devm_request_irq(v3d->drm.dev, v3d->irq[V3D_CORE_IRQ], v3d_irq, IRQF_SHARED, "v3d", v3d); if (ret) @@ -331,6 +342,12 @@ v3d_irq_disable(struct v3d_dev *v3d) V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~0); V3D_WRITE(V3D_HUB_INT_MSK_SET, ~0); + /* Finish any interrupt handler still in flight. */ + for (int i = 0; i < V3D_MAX_IRQS; i++) { + if (v3d->irq[i]) + synchronize_irq(v3d->irq[i]); + } + /* Clear any pending interrupts we might have left. */ for (core = 0; core < v3d->cores; core++) V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS(v3d->ver)); diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 35f131a46d07..42df9d3567e7 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -199,7 +199,6 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue) struct v3d_dev *v3d = job->v3d; struct v3d_file_priv *file = job->file->driver_priv; struct v3d_stats *global_stats = &v3d->queue[queue].stats; - struct v3d_stats *local_stats = &file->stats[queue]; u64 now = local_clock(); unsigned long flags; @@ -209,7 +208,12 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue) else preempt_disable(); - v3d_stats_update(local_stats, now); + /* Don't update the local stats if the file context has already closed */ + if (file) + v3d_stats_update(&file->stats[queue], now); + else + drm_dbg(&v3d->drm, "The file descriptor was closed before job completion\n"); + v3d_stats_update(global_stats, now); if (IS_ENABLED(CONFIG_LOCKDEP)) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 0695a342b1ef..5205552b1970 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -749,7 +749,7 @@ static int vmw_setup_pci_resources(struct vmw_private *dev, dev->fifo_mem = devm_memremap(dev->drm.dev, fifo_start, fifo_size, - MEMREMAP_WB); + MEMREMAP_WB | MEMREMAP_DEC); if (IS_ERR(dev->fifo_mem)) { drm_err(&dev->drm, diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index fcc2677a4229..99a91355842e 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE - tristate "Intel Xe Graphics" - depends on DRM && PCI && (m || (y && KUNIT=y)) + tristate "Intel Xe2 Graphics" + depends on DRM && PCI + depends on KUNIT || !KUNIT depends on INTEL_VSEC || !INTEL_VSEC depends on X86_PLATFORM_DEVICES || !(X86 && ACPI) select INTERVAL_TREE @@ -46,7 +47,8 @@ config DRM_XE select AUXILIARY_BUS select HMM_MIRROR help - Experimental driver for Intel Xe series GPUs + Driver for Intel Xe2 series GPUs and later. Experimental support + for Xe series is also available. If "M" is selected, the module will be called xe. diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 68f064f33d4b..9f4ade25787a 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -104,6 +104,8 @@ int xe_display_create(struct xe_device *xe) spin_lock_init(&xe->display.fb_tracking.lock); xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0); + if (!xe->display.hotplug.dp_wq) + return -ENOMEM; return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); } diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index f95375451e2f..9f941fc2e36b 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -17,10 +17,7 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); - xe_device_l2_flush(xe); } u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) @@ -30,12 +27,9 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); - xe_device_l2_flush(xe); } bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) @@ -74,9 +68,12 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + /* * The memory barrier here is to ensure coherency of DSB vs MMIO, * both for weak ordering archs and discrete cards. */ - xe_device_wmb(dsb_buf->vma->bo->tile->xe); + xe_device_wmb(xe); + xe_device_l2_flush(xe); } diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index d918ae1c8061..55259969480b 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -164,6 +164,9 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, vma->dpt = dpt; vma->node = dpt->ggtt_node[tile0->id]; + + /* Ensure DPT writes are flushed */ + xe_device_l2_flush(xe); return 0; } @@ -333,8 +336,6 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (ret) goto err_unpin; - /* Ensure DPT writes are flushed */ - xe_device_l2_flush(xe); return vma; err_unpin: diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h index 5394a1373a6b..ef2bf984723f 100644 --- a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h @@ -40,6 +40,7 @@ #define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0) #define PWR_LIM_VAL REG_GENMASK(14, 0) #define PWR_LIM_EN REG_BIT(15) +#define PWR_LIM REG_GENMASK(15, 0) #define PWR_LIM_TIME REG_GENMASK(23, 17) #define PWR_LIM_TIME_X REG_GENMASK(23, 22) #define PWR_LIM_TIME_Y REG_GENMASK(21, 17) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 7a8af2311318..11e60d687572 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -171,14 +171,32 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) #define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G) +/** + * xe_devcoredump_read() - Read data from the Xe device coredump snapshot + * @buffer: Destination buffer to copy the coredump data into + * @offset: Offset in the coredump data to start reading from + * @count: Number of bytes to read + * @data: Pointer to the xe_devcoredump structure + * @datalen: Length of the data (unused) + * + * Reads a chunk of the coredump snapshot data into the provided buffer. + * If the devcoredump is smaller than 1.5 GB (XE_DEVCOREDUMP_CHUNK_MAX), + * it is read directly from a pre-written buffer. For larger devcoredumps, + * the pre-written buffer must be periodically repopulated from the snapshot + * state due to kmalloc size limitations. + * + * Return: Number of bytes copied on success, or a negative error code on failure. + */ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { struct xe_devcoredump *coredump = data; struct xe_devcoredump_snapshot *ss; - ssize_t byte_copied; + ssize_t byte_copied = 0; u32 chunk_offset; ssize_t new_chunk_position; + bool pm_needed = false; + int ret = 0; if (!coredump) return -ENODEV; @@ -188,20 +206,19 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, /* Ensure delayed work is captured before continuing */ flush_work(&ss->work); - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + pm_needed = ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX; + if (pm_needed) xe_pm_runtime_get(gt_to_xe(ss->gt)); mutex_lock(&coredump->lock); if (!ss->read.buffer) { - mutex_unlock(&coredump->lock); - return -ENODEV; + ret = -ENODEV; + goto unlock; } - if (offset >= ss->read.size) { - mutex_unlock(&coredump->lock); - return 0; - } + if (offset >= ss->read.size) + goto unlock; new_chunk_position = div_u64_rem(offset, XE_DEVCOREDUMP_CHUNK_MAX, @@ -221,12 +238,13 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, ss->read.size - offset; memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied); +unlock: mutex_unlock(&coredump->lock); - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + if (pm_needed) xe_pm_runtime_put(gt_to_xe(ss->gt)); - return byte_copied; + return byte_copied ? byte_copied : ret; } static void xe_devcoredump_free(void *data) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index c02c4c4e9412..e9f3c1a53db2 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -40,6 +40,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" +#include "xe_guc_pc.h" #include "xe_hw_engine_group.h" #include "xe_hwmon.h" #include "xe_irq.h" @@ -986,38 +987,15 @@ void xe_device_wmb(struct xe_device *xe) xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0); } -/** - * xe_device_td_flush() - Flush transient L3 cache entries - * @xe: The device - * - * Display engine has direct access to memory and is never coherent with L3/L4 - * caches (or CPU caches), however KMD is responsible for specifically flushing - * transient L3 GPU cache entries prior to the flip sequence to ensure scanout - * can happen from such a surface without seeing corruption. - * - * Display surfaces can be tagged as transient by mapping it using one of the - * various L3:XD PAT index modes on Xe2. - * - * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed - * at the end of each submission via PIPE_CONTROL for compute/render, since SA - * Media is not coherent with L3 and we want to support render-vs-media - * usescases. For other engines like copy/blt the HW internally forces uncached - * behaviour, hence why we can skip the TDF on such platforms. +/* + * Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt. */ -void xe_device_td_flush(struct xe_device *xe) +static void tdf_request_sync(struct xe_device *xe) { - struct xe_gt *gt; unsigned int fw_ref; + struct xe_gt *gt; u8 id; - if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) - return; - - if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { - xe_device_l2_flush(xe); - return; - } - for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) continue; @@ -1027,6 +1005,7 @@ void xe_device_td_flush(struct xe_device *xe) return; xe_mmio_write32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); + /* * FIXME: We can likely do better here with our choice of * timeout. Currently we just assume the worst case, i.e. 150us, @@ -1057,15 +1036,52 @@ void xe_device_l2_flush(struct xe_device *xe) return; spin_lock(>->global_invl_lock); - xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); + xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); + spin_unlock(>->global_invl_lock); xe_force_wake_put(gt_to_fw(gt), fw_ref); } +/** + * xe_device_td_flush() - Flush transient L3 cache entries + * @xe: The device + * + * Display engine has direct access to memory and is never coherent with L3/L4 + * caches (or CPU caches), however KMD is responsible for specifically flushing + * transient L3 GPU cache entries prior to the flip sequence to ensure scanout + * can happen from such a surface without seeing corruption. + * + * Display surfaces can be tagged as transient by mapping it using one of the + * various L3:XD PAT index modes on Xe2. + * + * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed + * at the end of each submission via PIPE_CONTROL for compute/render, since SA + * Media is not coherent with L3 and we want to support render-vs-media + * usescases. For other engines like copy/blt the HW internally forces uncached + * behaviour, hence why we can skip the TDF on such platforms. + */ +void xe_device_td_flush(struct xe_device *xe) +{ + struct xe_gt *root_gt; + + if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) + return; + + root_gt = xe_root_mmio_gt(xe); + if (XE_WA(root_gt, 16023588340)) { + /* A transient flush is not sufficient: flush the L2 */ + xe_device_l2_flush(xe); + } else { + xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc); + tdf_request_sync(xe); + xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc); + } +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h index d61650d4aa0b..95242a375e54 100644 --- a/drivers/gpu/drm/xe/xe_drv.h +++ b/drivers/gpu/drm/xe/xe_drv.h @@ -9,7 +9,7 @@ #include <drm/drm_drv.h> #define DRIVER_NAME "xe" -#define DRIVER_DESC "Intel Xe Graphics" +#define DRIVER_DESC "Intel Xe2 Graphics" /* Interface history: * diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 7062115909f2..2c799958c1e4 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -201,6 +201,13 @@ static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { .ggtt_set_pte = xe_ggtt_set_pte_and_flush, }; +static void dev_fini_ggtt(void *arg) +{ + struct xe_ggtt *ggtt = arg; + + drain_workqueue(ggtt->wq); +} + /** * xe_ggtt_init_early - Early GGTT initialization * @ggtt: the &xe_ggtt to be initialized @@ -257,6 +264,10 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (err) return err; + err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); + if (err) + return err; + if (IS_SRIOV_VF(xe)) { err = xe_gt_sriov_vf_prepare_ggtt(xe_tile_get_gt(ggtt->tile, 0)); if (err) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 0e5d243c9451..6c4cb9576fb6 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -118,7 +118,7 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); } - xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF); xe_force_wake_put(gt_to_fw(gt), fw_ref); } diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 10622ca471a2..6717a636b1d9 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -444,6 +444,7 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) #define PF_MULTIPLIER 8 pf_queue->num_dw = (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; + pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw); #undef PF_MULTIPLIER pf_queue->gt = gt; diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 084cbdeba8ea..e1362e608146 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -138,6 +138,14 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) int pending_seqno; /* + * we can get here before the CTs are even initialized if we're wedging + * very early, in which case there are not going to be any pending + * fences so we can bail immediately. + */ + if (!xe_guc_ct_initialized(>->uc.guc.ct)) + return; + + /* * CT channel is already disabled at this point. No new TLB requests can * appear. */ diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 2447de0ebedf..bbcbb348256f 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -34,6 +34,11 @@ #include "xe_pm.h" #include "xe_trace_guc.h" +static void receive_g2h(struct xe_guc_ct *ct); +static void g2h_worker_func(struct work_struct *w); +static void safe_mode_worker_func(struct work_struct *w); +static void ct_exit_safe_mode(struct xe_guc_ct *ct); + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) enum { /* Internal states, not error conditions */ @@ -186,14 +191,11 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; + ct_exit_safe_mode(ct); destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); } -static void receive_g2h(struct xe_guc_ct *ct); -static void g2h_worker_func(struct work_struct *w); -static void safe_mode_worker_func(struct work_struct *w); - static void primelockdep(struct xe_guc_ct *ct) { if (!IS_ENABLED(CONFIG_LOCKDEP)) @@ -514,6 +516,9 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct) */ void xe_guc_ct_stop(struct xe_guc_ct *ct) { + if (!xe_guc_ct_initialized(ct)) + return; + xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED); stop_g2h_handler(ct); } @@ -760,7 +765,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u16 seqno; int ret; - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); xe_gt_assert(gt, !g2h_len || !g2h_fence); xe_gt_assert(gt, !num_g2h || !g2h_fence); xe_gt_assert(gt, !g2h_len || num_g2h); @@ -1344,7 +1349,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) u32 action; u32 *hxg; - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); lockdep_assert_held(&ct->fast_lock); if (ct->state == XE_GUC_CT_STATE_DISABLED) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index 82c4ae458dda..582aac106469 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -22,6 +22,11 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_pr void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb); +static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct) +{ + return ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED; +} + static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct) { return ct->state == XE_GUC_CT_STATE_ENABLED; diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 18c623992035..c0ca61695d76 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -5,8 +5,11 @@ #include "xe_guc_pc.h" +#include <linux/cleanup.h> #include <linux/delay.h> +#include <linux/jiffies.h> #include <linux/ktime.h> +#include <linux/wait_bit.h> #include <drm/drm_managed.h> #include <drm/drm_print.h> @@ -51,9 +54,12 @@ #define LNL_MERT_FREQ_CAP 800 #define BMG_MERT_FREQ_CAP 2133 +#define BMG_MIN_FREQ 1200 +#define BMG_MERT_FLUSH_FREQ_CAP 2600 #define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */ #define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */ +#define SLPC_ACT_FREQ_TIMEOUT_MS 100 /** * DOC: GuC Power Conservation (PC) @@ -141,6 +147,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc, return -ETIMEDOUT; } +static int wait_for_flush_complete(struct xe_guc_pc *pc) +{ + const unsigned long timeout = msecs_to_jiffies(30); + + if (!wait_var_event_timeout(&pc->flush_freq_limit, + !atomic_read(&pc->flush_freq_limit), + timeout)) + return -ETIMEDOUT; + + return 0; +} + +static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq) +{ + int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC; + int slept, wait = 10; + + for (slept = 0; slept < timeout_us;) { + if (xe_guc_pc_get_act_freq(pc) <= freq) + return 0; + + usleep_range(wait, wait << 1); + slept += wait; + wait <<= 1; + if (slept + wait > timeout_us) + wait = timeout_us - slept; + } + + return -ETIMEDOUT; +} static int pc_action_reset(struct xe_guc_pc *pc) { struct xe_guc_ct *ct = pc_to_ct(pc); @@ -553,6 +589,25 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) return pc->rpn_freq; } +static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ + int ret; + + lockdep_assert_held(&pc->freq_lock); + + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); + if (ret) + return ret; + + *freq = pc_get_min_freq(pc); + + return 0; +} + /** * xe_guc_pc_get_min_freq - Get the min operational frequency * @pc: The GuC PC @@ -563,26 +618,28 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) */ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - xe_device_assert_mem_access(pc_to_xe(pc)); + lockdep_assert_held(&pc->freq_lock); - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; - ret = pc_action_query_task_state(pc); + ret = pc_set_min_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_min_freq(pc); + pc->user_requested_min = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -596,24 +653,28 @@ out: */ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_set_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_set_min_freq(pc, freq); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); if (ret) - goto out; + return ret; - pc->user_requested_min = freq; + *freq = pc_get_max_freq(pc); -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -626,24 +687,28 @@ out: */ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) { + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_max_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq) +{ int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_action_query_task_state(pc); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_set_max_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_max_freq(pc); + pc->user_requested_max = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -657,24 +722,14 @@ out: */ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { - int ret; - - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; + if (XE_WA(pc_to_gt(pc), 22019338487)) { + if (wait_for_flush_complete(pc) != 0) + return -EAGAIN; } - ret = pc_set_max_freq(pc, freq); - if (ret) - goto out; - - pc->user_requested_max = freq; + guard(mutex)(&pc->freq_lock); -out: - mutex_unlock(&pc->freq_lock); - return ret; + return xe_guc_pc_set_max_freq_locked(pc, freq); } /** @@ -817,6 +872,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc) static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) { + struct xe_tile *tile = gt_to_tile(pc_to_gt(pc)); int ret; lockdep_assert_held(&pc->freq_lock); @@ -843,6 +899,9 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) if (pc_get_min_freq(pc) > pc->rp0_freq) ret = pc_set_min_freq(pc, pc->rp0_freq); + if (XE_WA(tile->primary_gt, 14022085890)) + ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc))); + out: return ret; } @@ -868,30 +927,117 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) return ret; } -static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +static bool needs_flush_freq_limit(struct xe_guc_pc *pc) { - int ret = 0; + struct xe_gt *gt = pc_to_gt(pc); - if (XE_WA(pc_to_gt(pc), 22019338487)) { - /* - * Get updated min/max and stash them. - */ - ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq); - if (!ret) - ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq); - if (ret) - return ret; + return XE_WA(gt, 22019338487) && + pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP; +} + +/** + * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush + * @pc: the xe_guc_pc object + * + * As per the WA, reduce max GT frequency during L2 cache flush + */ +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 max_freq; + int ret; + + if (!needs_flush_freq_limit(pc)) + return; + + guard(mutex)(&pc->freq_lock); + + ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq); + if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) { + ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) { + xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); + return; + } + + atomic_set(&pc->flush_freq_limit, 1); /* - * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + * If user has previously changed max freq, stash that value to + * restore later, otherwise use the current max. New user + * requests wait on flush. */ - mutex_lock(&pc->freq_lock); - ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); - if (!ret) - ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); - mutex_unlock(&pc->freq_lock); + if (pc->user_requested_max != 0) + pc->stashed_max_freq = pc->user_requested_max; + else + pc->stashed_max_freq = max_freq; } + /* + * Wait for actual freq to go below the flush cap: even if the previous + * max was below cap, the current one might still be above it + */ + ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) + xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); +} + +/** + * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes. + * @pc: the xe_guc_pc object + * + * Retrieve the previous GT max frequency value. + */ +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + int ret = 0; + + if (!needs_flush_freq_limit(pc)) + return; + + if (!atomic_read(&pc->flush_freq_limit)) + return; + + mutex_lock(&pc->freq_lock); + + ret = pc_set_max_freq(>->uc.guc.pc, pc->stashed_max_freq); + if (ret) + xe_gt_err_once(gt, "Failed to restore max freq %u:%d", + pc->stashed_max_freq, ret); + + atomic_set(&pc->flush_freq_limit, 0); + mutex_unlock(&pc->freq_lock); + wake_up_var(&pc->flush_freq_limit); +} + +static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) +{ + int ret; + + if (!XE_WA(pc_to_gt(pc), 22019338487)) + return 0; + + guard(mutex)(&pc->freq_lock); + + /* + * Get updated min/max and stash them. + */ + ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq); + if (!ret) + ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq); + if (ret) + return ret; + + /* + * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + */ + ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); + if (!ret) + ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); + return ret; } @@ -1068,7 +1214,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) goto out; } - memset(pc->bo->vmap.vaddr, 0, size); + xe_map_memset(xe, &pc->bo->vmap, 0, 0, size); slpc_shared_data_write(pc, header.size, size); earlier = ktime_get(); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 0a2664d5c811..52ecdd5ddbff 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -38,5 +38,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); void xe_guc_pc_init_early(struct xe_guc_pc *pc); int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc); void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc); +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc); +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc); #endif /* _XE_GUC_PC_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 2978ac9a249b..c02053948a57 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -15,6 +15,8 @@ struct xe_guc_pc { /** @bo: GGTT buffer object that is shared with GuC PC */ struct xe_bo *bo; + /** @flush_freq_limit: 1 when max freq changes are limited by driver */ + atomic_t flush_freq_limit; /** @rp0_freq: HW RP0 frequency - The Maximum one */ u32 rp0_freq; /** @rpa_freq: HW RPa frequency - The Achievable one */ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 6d84a52b660a..2ac87ff4a057 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -891,12 +891,13 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_gpu_scheduler *sched = &ge->sched; - bool wedged; + bool wedged = false; xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); trace_xe_exec_queue_lr_cleanup(q); - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Kill the run_job / process_msg entry points */ xe_sched_submission_stop(sched); @@ -1070,7 +1071,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) int err = -ETIME; pid_t pid = -1; int i = 0; - bool wedged, skip_timeout_check; + bool wedged = false, skip_timeout_check; /* * TDR has fired before free job worker. Common if exec queue @@ -1116,7 +1117,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * doesn't work for SRIOV. For now assuming timeouts in wedged mode are * genuine timeouts. */ - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Engine state now stable, disable scheduling to check timestamp */ if (!wedged && exec_queue_registered(q)) { @@ -1762,6 +1764,9 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc) { int ret; + if (!guc->submission_state.initialized) + return 0; + /* * Using an atomic here rather than submission_state.lock as this * function can be called while holding the CT lock (engine reset diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 74f31639b37f..f008e8049700 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -159,8 +159,8 @@ static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 att return ret; } -static int xe_hwmon_pcode_write_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel, - u32 uval) +static int xe_hwmon_pcode_rmw_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel, + u32 clr, u32 set) { struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); u32 val0, val1; @@ -179,7 +179,7 @@ static int xe_hwmon_pcode_write_power_limit(const struct xe_hwmon *hwmon, u32 at channel, val0, val1, ret); if (attr == PL1_HWMON_ATTR) - val0 = uval; + val0 = (val0 & ~clr) | set; else return -EIO; @@ -339,7 +339,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe if (hwmon->xe->info.has_mbx_power_limits) { drm_dbg(&hwmon->xe->drm, "disabling %s on channel %d\n", PWR_ATTR_TO_STR(attr), channel); - xe_hwmon_pcode_write_power_limit(hwmon, attr, channel, 0); + xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM_EN, 0); xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, ®_val); } else { reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN, 0); @@ -370,10 +370,9 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe } if (hwmon->xe->info.has_mbx_power_limits) - ret = xe_hwmon_pcode_write_power_limit(hwmon, attr, channel, reg_val); + ret = xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM, reg_val); else - reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN | PWR_LIM_VAL, - reg_val); + reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM, reg_val); unlock: mutex_unlock(&hwmon->hwmon_lock); return ret; @@ -563,14 +562,11 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a mutex_lock(&hwmon->hwmon_lock); - if (hwmon->xe->info.has_mbx_power_limits) { - ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r); - r = (r & ~PWR_LIM_TIME) | rxy; - xe_hwmon_pcode_write_power_limit(hwmon, power_attr, channel, r); - } else { + if (hwmon->xe->info.has_mbx_power_limits) + xe_hwmon_pcode_rmw_power_limit(hwmon, power_attr, channel, PWR_LIM_TIME, rxy); + else r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel), PWR_LIM_TIME, rxy); - } mutex_unlock(&hwmon->hwmon_lock); @@ -1138,12 +1134,12 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) } else { drm_info(&hwmon->xe->drm, "Using mailbox commands for power limits\n"); /* Write default limits to read from pcode from now on. */ - xe_hwmon_pcode_write_power_limit(hwmon, PL1_HWMON_ATTR, - CHANNEL_CARD, - hwmon->pl1_on_boot[CHANNEL_CARD]); - xe_hwmon_pcode_write_power_limit(hwmon, PL1_HWMON_ATTR, - CHANNEL_PKG, - hwmon->pl1_on_boot[CHANNEL_PKG]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, + CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME, + hwmon->pl1_on_boot[CHANNEL_CARD]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, + CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME, + hwmon->pl1_on_boot[CHANNEL_PKG]); hwmon->scl_shift_power = PWR_UNIT; hwmon->scl_shift_energy = ENERGY_UNIT; hwmon->scl_shift_time = TIME_UNIT; diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 63db66df064b..023ed6a6b49d 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -78,6 +78,9 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level } lmtt_assert(lmtt, xe_bo_is_vram(bo)); + lmtt_debug(lmtt, "level=%u addr=%#llx\n", level, (u64)xe_bo_main_addr(bo, XE_PAGE_SIZE)); + + xe_map_memset(lmtt_to_xe(lmtt), &bo->vmap, 0, 0, bo->size); pt->level = level; pt->bo = bo; @@ -91,6 +94,9 @@ out: static void lmtt_pt_free(struct xe_lmtt_pt *pt) { + lmtt_debug(&pt->bo->tile->sriov.pf.lmtt, "level=%u addr=%llx\n", + pt->level, (u64)xe_bo_main_addr(pt->bo, XE_PAGE_SIZE)); + xe_bo_unpin_map_no_vm(pt->bo); kfree(pt); } @@ -226,9 +232,14 @@ static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, switch (lmtt->ops->lmtt_pte_size(level)) { case sizeof(u32): + lmtt_assert(lmtt, !overflows_type(pte, u32)); + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u32), u32)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte); break; case sizeof(u64): + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u64), u64)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte); break; default: diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index bf7c3981897d..6e7b70532d11 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -40,6 +40,7 @@ #define LRC_PPHWSP_SIZE SZ_4K #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K +#define LRC_WA_BB_SIZE SZ_4K static struct xe_device * lrc_to_xe(struct xe_lrc *lrc) @@ -910,7 +911,11 @@ static void xe_lrc_finish(struct xe_lrc *lrc) { xe_hw_fence_ctx_finish(&lrc->fence_ctx); xe_bo_unpin_map_no_vm(lrc->bo); - xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo); +} + +static size_t wa_bb_offset(struct xe_lrc *lrc) +{ + return lrc->bo->size - LRC_WA_BB_SIZE; } /* @@ -943,15 +948,16 @@ static void xe_lrc_finish(struct xe_lrc *lrc) #define CONTEXT_ACTIVE 1ULL static int xe_lrc_setup_utilization(struct xe_lrc *lrc) { + const size_t max_size = LRC_WA_BB_SIZE; u32 *cmd, *buf = NULL; - if (lrc->bb_per_ctx_bo->vmap.is_iomem) { - buf = kmalloc(lrc->bb_per_ctx_bo->size, GFP_KERNEL); + if (lrc->bo->vmap.is_iomem) { + buf = kmalloc(max_size, GFP_KERNEL); if (!buf) return -ENOMEM; cmd = buf; } else { - cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc); } *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; @@ -974,13 +980,14 @@ static int xe_lrc_setup_utilization(struct xe_lrc *lrc) *cmd++ = MI_BATCH_BUFFER_END; if (buf) { - xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bb_per_ctx_bo->vmap, 0, - buf, (cmd - buf) * sizeof(*cmd)); + xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap, + wa_bb_offset(lrc), buf, + (cmd - buf) * sizeof(*cmd)); kfree(buf); } - xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, - xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1); + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) + + wa_bb_offset(lrc) + 1); return 0; } @@ -1018,20 +1025,13 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address * via VM bind calls. */ - lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, lrc_size, + lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, + lrc_size + LRC_WA_BB_SIZE, ttm_bo_type_kernel, bo_flags); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); - lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, - ttm_bo_type_kernel, - bo_flags); - if (IS_ERR(lrc->bb_per_ctx_bo)) { - err = PTR_ERR(lrc->bb_per_ctx_bo); - goto err_lrc_finish; - } - lrc->size = lrc_size; lrc->ring.size = ring_size; lrc->ring.tail = 0; @@ -1819,7 +1819,8 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->seqno = xe_lrc_seqno(lrc); snapshot->lrc_bo = xe_bo_get(lrc->bo); snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); - snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; + snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset - + LRC_WA_BB_SIZE; snapshot->lrc_snapshot = NULL; snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index ae24cf6f8dd9..883e550a9423 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -53,9 +53,6 @@ struct xe_lrc { /** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */ u64 ctx_timestamp; - - /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */ - struct xe_bo *bb_per_ctx_bo; }; struct xe_lrc_snapshot; diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 8f8e9fdfb2a8..66bc02302c55 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -82,7 +82,7 @@ struct xe_migrate { * of the instruction. Subtracting the instruction header (1 dword) and * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values. */ -#define MAX_PTE_PER_SDI 0x1FE +#define MAX_PTE_PER_SDI 0x1FEU /** * xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue. @@ -863,7 +863,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) xe_res_next(&src_it, src_L0); else - emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs, + emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs || use_comp_pat, &src_it, src_L0, src); if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) @@ -1553,15 +1553,17 @@ static u32 pte_update_cmd_size(u64 size) u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE); XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER); + /* * MI_STORE_DATA_IMM command is used to update page table. Each - * instruction can update maximumly 0x1ff pte entries. To update - * n (n <= 0x1ff) pte entries, we need: - * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) - * 2 dword for the page table's physical location - * 2*n dword for value of pte to fill (each pte entry is 2 dwords) + * instruction can update maximumly MAX_PTE_PER_SDI pte entries. To + * update n (n <= MAX_PTE_PER_SDI) pte entries, we need: + * + * - 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) + * - 2 dword for the page table's physical location + * - 2*n dword for value of pte to fill (each pte entry is 2 dwords) */ - num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff); + num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, MAX_PTE_PER_SDI); num_dword += entries * 2; return num_dword; @@ -1577,7 +1579,7 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); while (ptes) { - u32 chunk = min(0x1ffU, ptes); + u32 chunk = min(MAX_PTE_PER_SDI, ptes); bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = pt_offset; diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index e4742e27e2cd..da6793c2f991 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -20,7 +20,7 @@ struct xe_modparam xe_modparam = { .probe_display = true, - .guc_log_level = 3, + .guc_log_level = IS_ENABLED(CONFIG_DRM_XE_DEBUG) ? 3 : 1, .force_probe = CONFIG_DRM_XE_FORCE_PROBE, .wedged_mode = 1, .svm_notifier_size = 512, diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index ac4beaed58ff..278af53c74dc 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -140,7 +140,6 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_asid = 1, \ .has_atomic_enable_pte_bit = 1, \ .has_flat_ccs = 1, \ - .has_indirect_ring_state = 1, \ .has_range_tlb_invalidation = 1, \ .has_usm = 1, \ .has_64bit_timestamp = 1, \ diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index ff749edc005b..ad263de44111 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -134,7 +134,7 @@ int xe_pm_suspend(struct xe_device *xe) /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) - goto err_pxp; + goto err_display; for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); @@ -151,7 +151,6 @@ int xe_pm_suspend(struct xe_device *xe) err_display: xe_display_pm_resume(xe); -err_pxp: xe_pxp_pm_resume(xe->pxp); err: drm_dbg(&xe->drm, "Device suspend failed %d\n", err); @@ -753,11 +752,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe) } /** - * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold + * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold * @xe: xe device instance - * @threshold: VRAM size in bites for the D3cold threshold + * @threshold: VRAM size in MiB for the D3cold threshold * - * Returns 0 for success, negative error code otherwise. + * Return: + * * 0 - success + * * -EINVAL - invalid argument */ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) { diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 2741849bbf4d..a6612105201a 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -114,10 +114,10 @@ struct fw_blobs_by_type { #define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \ - fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \ + fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 45, 2)) \ + fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \ fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \ - fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 44, 1)) \ + fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \ fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \ fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \ fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \ diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 9efc5accd43d..6d70109fcc43 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -21,7 +21,8 @@ GRAPHICS_VERSION_RANGE(1270, 1274) MEDIA_VERSION(1300) PLATFORM(DG2) -14018094691 GRAPHICS_VERSION(2004) +14018094691 GRAPHICS_VERSION_RANGE(2001, 2002) + GRAPHICS_VERSION(2004) 14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) 18024947630 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) @@ -37,10 +38,10 @@ GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) 22019338487 MEDIA_VERSION(2000) - GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) 22019338487_display PLATFORM(LUNARLAKE) -16023588340 GRAPHICS_VERSION(2001) +16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) 14019789679 GRAPHICS_VERSION(1255) GRAPHICS_VERSION_RANGE(1270, 2004) no_media_l3 MEDIA_VERSION(3000) @@ -59,3 +60,7 @@ no_media_l3 MEDIA_VERSION(3000) MEDIA_VERSION_RANGE(1301, 3000) 16026508708 GRAPHICS_VERSION_RANGE(1200, 3001) MEDIA_VERSION_RANGE(1300, 3000) + +# SoC workaround - currently applies to all platforms with the following +# primary GT GMDID +14022085890 GRAPHICS_VERSION(2001) diff --git a/drivers/hid/hid-appletb-kbd.c b/drivers/hid/hid-appletb-kbd.c index 6f251b284018..b00687e67ce8 100644 --- a/drivers/hid/hid-appletb-kbd.c +++ b/drivers/hid/hid-appletb-kbd.c @@ -430,14 +430,20 @@ static int appletb_kbd_probe(struct hid_device *hdev, const struct hid_device_id ret = appletb_kbd_set_mode(kbd, appletb_tb_def_mode); if (ret) { dev_err_probe(dev, ret, "Failed to set touchbar mode\n"); - goto close_hw; + goto unregister_handler; } hid_set_drvdata(hdev, kbd); return 0; +unregister_handler: + input_unregister_handler(&kbd->inp_handler); close_hw: + if (kbd->backlight_dev) { + put_device(&kbd->backlight_dev->dev); + timer_delete_sync(&kbd->inactivity_timer); + } hid_hw_close(hdev); stop_hw: hid_hw_stop(hdev); @@ -451,7 +457,10 @@ static void appletb_kbd_remove(struct hid_device *hdev) appletb_kbd_set_mode(kbd, APPLETB_KBD_MODE_OFF); input_unregister_handler(&kbd->inp_handler); - timer_delete_sync(&kbd->inactivity_timer); + if (kbd->backlight_dev) { + put_device(&kbd->backlight_dev->dev); + timer_delete_sync(&kbd->inactivity_timer); + } hid_hw_close(hdev); hid_hw_stop(hdev); diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 8433306148d5..c6b6b1029540 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -3298,8 +3298,8 @@ static const char *keys[KEY_MAX + 1] = { [BTN_TOUCH] = "Touch", [BTN_STYLUS] = "Stylus", [BTN_STYLUS2] = "Stylus2", [BTN_TOOL_DOUBLETAP] = "ToolDoubleTap", [BTN_TOOL_TRIPLETAP] = "ToolTripleTap", [BTN_TOOL_QUADTAP] = "ToolQuadrupleTap", - [BTN_GEAR_DOWN] = "WheelBtn", - [BTN_GEAR_UP] = "Gear up", [KEY_OK] = "Ok", + [BTN_GEAR_DOWN] = "BtnGearDown", [BTN_GEAR_UP] = "BtnGearUp", + [BTN_WHEEL] = "BtnWheel", [KEY_OK] = "Ok", [KEY_SELECT] = "Select", [KEY_GOTO] = "Goto", [KEY_CLEAR] = "Clear", [KEY_POWER2] = "Power2", [KEY_OPTION] = "Option", [KEY_INFO] = "Info", diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c index defcf91fdd14..0ad7d25d9864 100644 --- a/drivers/hid/hid-elecom.c +++ b/drivers/hid/hid-elecom.c @@ -89,7 +89,8 @@ static const __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, break; case USB_DEVICE_ID_ELECOM_M_DT1URBK: case USB_DEVICE_ID_ELECOM_M_DT1DRBK: - case USB_DEVICE_ID_ELECOM_M_HT1URBK: + case USB_DEVICE_ID_ELECOM_M_HT1URBK_010C: + case USB_DEVICE_ID_ELECOM_M_HT1URBK_019B: case USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D: /* * Report descriptor format: @@ -122,7 +123,8 @@ static const struct hid_device_id elecom_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT4DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) }, - { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_010C) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_019B) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C) }, { } diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index e3fb4e2fe911..33cc5820f2be 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -312,6 +312,8 @@ #define USB_DEVICE_ID_ASUS_AK1D 0x1125 #define USB_DEVICE_ID_CHICONY_TOSHIBA_WT10A 0x1408 #define USB_DEVICE_ID_CHICONY_ACER_SWITCH12 0x1421 +#define USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA 0xb824 +#define USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA2 0xb82c #define USB_VENDOR_ID_CHUNGHWAT 0x2247 #define USB_DEVICE_ID_CHUNGHWAT_MULTITOUCH 0x0001 @@ -446,7 +448,8 @@ #define USB_DEVICE_ID_ELECOM_M_XT4DRBK 0x00fd #define USB_DEVICE_ID_ELECOM_M_DT1URBK 0x00fe #define USB_DEVICE_ID_ELECOM_M_DT1DRBK 0x00ff -#define USB_DEVICE_ID_ELECOM_M_HT1URBK 0x010c +#define USB_DEVICE_ID_ELECOM_M_HT1URBK_010C 0x010c +#define USB_DEVICE_ID_ELECOM_M_HT1URBK_019B 0x019b #define USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D 0x010d #define USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C 0x011c @@ -819,6 +822,7 @@ #define USB_DEVICE_ID_LENOVO_TPPRODOCK 0x6067 #define USB_DEVICE_ID_LENOVO_X1_COVER 0x6085 #define USB_DEVICE_ID_LENOVO_X1_TAB 0x60a3 +#define USB_DEVICE_ID_LENOVO_X1_TAB2 0x60a4 #define USB_DEVICE_ID_LENOVO_X1_TAB3 0x60b5 #define USB_DEVICE_ID_LENOVO_X12_TAB 0x60fe #define USB_DEVICE_ID_LENOVO_X12_TAB2 0x61ae @@ -1525,4 +1529,7 @@ #define USB_VENDOR_ID_SIGNOTEC 0x2133 #define USB_DEVICE_ID_SIGNOTEC_VIEWSONIC_PD1011 0x0018 +#define USB_VENDOR_ID_SMARTLINKTECHNOLOGY 0x4c4a +#define USB_DEVICE_ID_SMARTLINKTECHNOLOGY_4155 0x4155 + #endif diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 9d80635a91eb..ff1784b5c2a4 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -2343,7 +2343,7 @@ int hidinput_connect(struct hid_device *hid, unsigned int force) } if (list_empty(&hid->inputs)) { - hid_err(hid, "No inputs registered, leaving\n"); + hid_dbg(hid, "No inputs registered, leaving\n"); goto out_unwind; } diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index af29ba840522..b3121fa7a72d 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -492,6 +492,7 @@ static int lenovo_input_mapping(struct hid_device *hdev, case USB_DEVICE_ID_LENOVO_X12_TAB: case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: return lenovo_input_mapping_x1_tab_kbd(hdev, hi, field, usage, bit, max); default: @@ -548,11 +549,14 @@ static void lenovo_features_set_cptkbd(struct hid_device *hdev) /* * Tell the keyboard a driver understands it, and turn F7, F9, F11 into - * regular keys + * regular keys (Compact only) */ - ret = lenovo_send_cmd_cptkbd(hdev, 0x01, 0x03); - if (ret) - hid_warn(hdev, "Failed to switch F7/9/11 mode: %d\n", ret); + if (hdev->product == USB_DEVICE_ID_LENOVO_CUSBKBD || + hdev->product == USB_DEVICE_ID_LENOVO_CBTKBD) { + ret = lenovo_send_cmd_cptkbd(hdev, 0x01, 0x03); + if (ret) + hid_warn(hdev, "Failed to switch F7/9/11 mode: %d\n", ret); + } /* Switch middle button to native mode */ ret = lenovo_send_cmd_cptkbd(hdev, 0x09, 0x01); @@ -605,6 +609,7 @@ static ssize_t attr_fn_lock_store(struct device *dev, case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: ret = lenovo_led_set_tp10ubkbd(hdev, TP10UBKBD_FN_LOCK_LED, value); if (ret) @@ -861,6 +866,7 @@ static int lenovo_event(struct hid_device *hdev, struct hid_field *field, case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: return lenovo_event_tp10ubkbd(hdev, field, usage, value); default: @@ -1144,6 +1150,7 @@ static int lenovo_led_brightness_set(struct led_classdev *led_cdev, case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: ret = lenovo_led_set_tp10ubkbd(hdev, tp10ubkbd_led[led_nr], value); break; @@ -1384,6 +1391,7 @@ static int lenovo_probe(struct hid_device *hdev, case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: ret = lenovo_probe_tp10ubkbd(hdev); break; @@ -1473,6 +1481,7 @@ static void lenovo_remove(struct hid_device *hdev) case USB_DEVICE_ID_LENOVO_X12_TAB2: case USB_DEVICE_ID_LENOVO_TP10UBKBD: case USB_DEVICE_ID_LENOVO_X1_TAB: + case USB_DEVICE_ID_LENOVO_X1_TAB2: case USB_DEVICE_ID_LENOVO_X1_TAB3: lenovo_remove_tp10ubkbd(hdev); break; @@ -1524,6 +1533,8 @@ static const struct hid_device_id lenovo_devices[] = { { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB2) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB3) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X12_TAB) }, diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index b41001e02da7..a1c54ffe02b4 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2132,12 +2132,18 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_GENERIC, USB_VENDOR_ID_LG, I2C_DEVICE_ID_LG_7010) }, - /* Lenovo X1 TAB Gen 2 */ + /* Lenovo X1 TAB Gen 1 */ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_TAB) }, + /* Lenovo X1 TAB Gen 2 */ + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, + HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_LENOVO, + USB_DEVICE_ID_LENOVO_X1_TAB2) }, + /* Lenovo X1 TAB Gen 3 */ { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index 839d5bcd72b1..fb4985988615 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -308,6 +308,7 @@ enum joycon_ctlr_state { JOYCON_CTLR_STATE_INIT, JOYCON_CTLR_STATE_READ, JOYCON_CTLR_STATE_REMOVED, + JOYCON_CTLR_STATE_SUSPENDED, }; /* Controller type received as part of device info */ @@ -2750,14 +2751,46 @@ static void nintendo_hid_remove(struct hid_device *hdev) static int nintendo_hid_resume(struct hid_device *hdev) { - int ret = joycon_init(hdev); + struct joycon_ctlr *ctlr = hid_get_drvdata(hdev); + int ret; + + hid_dbg(hdev, "resume\n"); + if (!joycon_using_usb(ctlr)) { + hid_dbg(hdev, "no-op resume for bt ctlr\n"); + ctlr->ctlr_state = JOYCON_CTLR_STATE_READ; + return 0; + } + ret = joycon_init(hdev); if (ret) - hid_err(hdev, "Failed to restore controller after resume"); + hid_err(hdev, + "Failed to restore controller after resume: %d\n", + ret); + else + ctlr->ctlr_state = JOYCON_CTLR_STATE_READ; return ret; } +static int nintendo_hid_suspend(struct hid_device *hdev, pm_message_t message) +{ + struct joycon_ctlr *ctlr = hid_get_drvdata(hdev); + + hid_dbg(hdev, "suspend: %d\n", message.event); + /* + * Avoid any blocking loops in suspend/resume transitions. + * + * joycon_enforce_subcmd_rate() can result in repeated retries if for + * whatever reason the controller stops providing input reports. + * + * This has been observed with bluetooth controllers which lose + * connectivity prior to suspend (but not long enough to result in + * complete disconnection). + */ + ctlr->ctlr_state = JOYCON_CTLR_STATE_SUSPENDED; + return 0; +} + #endif static const struct hid_device_id nintendo_hid_devices[] = { @@ -2796,6 +2829,7 @@ static struct hid_driver nintendo_hid_driver = { #ifdef CONFIG_PM .resume = nintendo_hid_resume, + .suspend = nintendo_hid_suspend, #endif }; static int __init nintendo_init(void) diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 7fefeb413ec3..9bf9ce8dc803 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -410,7 +410,8 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_XT4DRBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1URBK) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_DT1DRBK) }, - { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_010C) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1URBK_019B) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_010D) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_M_HT1DRBK_011C) }, #endif @@ -757,6 +758,8 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_AVERMEDIA, USB_DEVICE_ID_AVER_FM_MR800) }, { HID_USB_DEVICE(USB_VENDOR_ID_AXENTIA, USB_DEVICE_ID_AXENTIA_FM_RADIO) }, { HID_USB_DEVICE(USB_VENDOR_ID_BERKSHIRE, USB_DEVICE_ID_BERKSHIRE_PCWD) }, + { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA) }, + { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_HP_5MP_CAMERA2) }, { HID_USB_DEVICE(USB_VENDOR_ID_CIDC, 0x0103) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_RADIO_SI470X) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_RADIO_SI4713) }, @@ -904,6 +907,7 @@ static const struct hid_device_id hid_ignore_list[] = { #endif { HID_USB_DEVICE(USB_VENDOR_ID_YEALINK, USB_DEVICE_ID_YEALINK_P1K_P4K_B2K) }, { HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_HP_5MP_CAMERA_5473) }, + { HID_USB_DEVICE(USB_VENDOR_ID_SMARTLINKTECHNOLOGY, USB_DEVICE_ID_SMARTLINKTECHNOLOGY_4155) }, { } }; diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h index 07e90d51f073..fa5d68c36313 100644 --- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h +++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h @@ -38,6 +38,7 @@ #define PCI_DEVICE_ID_INTEL_ISH_LNL_M 0xA845 #define PCI_DEVICE_ID_INTEL_ISH_PTL_H 0xE345 #define PCI_DEVICE_ID_INTEL_ISH_PTL_P 0xE445 +#define PCI_DEVICE_ID_INTEL_ISH_WCL 0x4D45 #define REVISION_ID_CHT_A0 0x6 #define REVISION_ID_CHT_Ax_SI 0x0 diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index ff0fc8010072..c57483224db6 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -27,10 +27,12 @@ enum ishtp_driver_data_index { ISHTP_DRIVER_DATA_NONE, ISHTP_DRIVER_DATA_LNL_M, ISHTP_DRIVER_DATA_PTL, + ISHTP_DRIVER_DATA_WCL, }; #define ISH_FW_GEN_LNL_M "lnlm" #define ISH_FW_GEN_PTL "ptl" +#define ISH_FW_GEN_WCL "wcl" #define ISH_FIRMWARE_PATH(gen) "intel/ish/ish_" gen ".bin" #define ISH_FIRMWARE_PATH_ALL "intel/ish/ish_*.bin" @@ -42,6 +44,9 @@ static struct ishtp_driver_data ishtp_driver_data[] = { [ISHTP_DRIVER_DATA_PTL] = { .fw_generation = ISH_FW_GEN_PTL, }, + [ISHTP_DRIVER_DATA_WCL] = { + .fw_generation = ISH_FW_GEN_WCL, + }, }; static const struct pci_device_id ish_pci_tbl[] = { @@ -67,9 +72,10 @@ static const struct pci_device_id ish_pci_tbl[] = { {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_MTL_P)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_ARL_H)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_ARL_S)}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_LNL_M), .driver_data = ISHTP_DRIVER_DATA_LNL_M}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_PTL_H), .driver_data = ISHTP_DRIVER_DATA_PTL}, - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_PTL_P), .driver_data = ISHTP_DRIVER_DATA_PTL}, + {PCI_DEVICE_DATA(INTEL, ISH_LNL_M, ISHTP_DRIVER_DATA_LNL_M)}, + {PCI_DEVICE_DATA(INTEL, ISH_PTL_H, ISHTP_DRIVER_DATA_PTL)}, + {PCI_DEVICE_DATA(INTEL, ISH_PTL_P, ISHTP_DRIVER_DATA_PTL)}, + {PCI_DEVICE_DATA(INTEL, ISH_WCL, ISHTP_DRIVER_DATA_WCL)}, {} }; MODULE_DEVICE_TABLE(pci, ish_pci_tbl); diff --git a/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-protocol.c b/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-protocol.c index f493df0d5dc4..a63f8c833252 100644 --- a/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-protocol.c +++ b/drivers/hid/intel-thc-hid/intel-quicki2c/quicki2c-protocol.c @@ -4,6 +4,7 @@ #include <linux/bitfield.h> #include <linux/hid.h> #include <linux/hid-over-i2c.h> +#include <linux/unaligned.h> #include "intel-thc-dev.h" #include "intel-thc-dma.h" @@ -200,6 +201,9 @@ int quicki2c_set_report(struct quicki2c_device *qcdev, u8 report_type, int quicki2c_reset(struct quicki2c_device *qcdev) { + u16 input_reg = le16_to_cpu(qcdev->dev_desc.input_reg); + size_t read_len = HIDI2C_LENGTH_LEN; + u32 prd_len = read_len; int ret; qcdev->reset_ack = false; @@ -213,12 +217,32 @@ int quicki2c_reset(struct quicki2c_device *qcdev) ret = wait_event_interruptible_timeout(qcdev->reset_ack_wq, qcdev->reset_ack, HIDI2C_RESET_TIMEOUT * HZ); - if (ret <= 0 || !qcdev->reset_ack) { + if (qcdev->reset_ack) + return 0; + + /* + * Manually read reset response if it wasn't received, in case reset interrupt + * was missed by touch device or THC hardware. + */ + ret = thc_tic_pio_read(qcdev->thc_hw, input_reg, read_len, &prd_len, + (u32 *)qcdev->input_buf); + if (ret) { + dev_err_once(qcdev->dev, "Read Reset Response failed, ret %d\n", ret); + return ret; + } + + /* + * Check response packet length, it's first 16 bits of packet. + * If response packet length is zero, it's reset response, otherwise not. + */ + if (get_unaligned_le16(qcdev->input_buf)) { dev_err_once(qcdev->dev, "Wait reset response timed out ret:%d timeout:%ds\n", ret, HIDI2C_RESET_TIMEOUT); return -ETIMEDOUT; } + qcdev->reset_ack = true; + return 0; } diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index eaf099b2efdb..9a57504e51a1 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2048,14 +2048,18 @@ static int wacom_initialize_remotes(struct wacom *wacom) remote->remote_dir = kobject_create_and_add("wacom_remote", &wacom->hdev->dev.kobj); - if (!remote->remote_dir) + if (!remote->remote_dir) { + kfifo_free(&remote->remote_fifo); return -ENOMEM; + } error = sysfs_create_files(remote->remote_dir, remote_unpair_attrs); if (error) { hid_err(wacom->hdev, "cannot create sysfs group err: %d\n", error); + kfifo_free(&remote->remote_fifo); + kobject_put(remote->remote_dir); return error; } @@ -2901,6 +2905,7 @@ static void wacom_remove(struct hid_device *hdev) hid_hw_stop(hdev); cancel_delayed_work_sync(&wacom->init_work); + cancel_delayed_work_sync(&wacom->aes_battery_work); cancel_work_sync(&wacom->wireless_work); cancel_work_sync(&wacom->battery_work); cancel_work_sync(&wacom->remote_work); diff --git a/drivers/hwmon/ftsteutates.c b/drivers/hwmon/ftsteutates.c index a3a07662e491..8aeec16a7a90 100644 --- a/drivers/hwmon/ftsteutates.c +++ b/drivers/hwmon/ftsteutates.c @@ -423,13 +423,16 @@ static int fts_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, break; case hwmon_pwm: switch (attr) { - case hwmon_pwm_auto_channels_temp: - if (data->fan_source[channel] == FTS_FAN_SOURCE_INVALID) + case hwmon_pwm_auto_channels_temp: { + u8 fan_source = data->fan_source[channel]; + + if (fan_source == FTS_FAN_SOURCE_INVALID || fan_source >= BITS_PER_LONG) *val = 0; else - *val = BIT(data->fan_source[channel]); + *val = BIT(fan_source); return 0; + } default: break; } diff --git a/drivers/hwmon/ltc4282.c b/drivers/hwmon/ltc4282.c index 7f38d2696239..f607fe8f7937 100644 --- a/drivers/hwmon/ltc4282.c +++ b/drivers/hwmon/ltc4282.c @@ -1512,13 +1512,6 @@ static int ltc4282_setup(struct ltc4282_state *st, struct device *dev) } if (device_property_read_bool(dev, "adi,fault-log-enable")) { - ret = regmap_set_bits(st->map, LTC4282_ADC_CTRL, - LTC4282_FAULT_LOG_EN_MASK); - if (ret) - return ret; - } - - if (device_property_read_bool(dev, "adi,fault-log-enable")) { ret = regmap_set_bits(st->map, LTC4282_ADC_CTRL, LTC4282_FAULT_LOG_EN_MASK); if (ret) return ret; diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c index 9486db249c64..b3694a4209b9 100644 --- a/drivers/hwmon/occ/common.c +++ b/drivers/hwmon/occ/common.c @@ -459,12 +459,10 @@ static ssize_t occ_show_power_1(struct device *dev, return sysfs_emit(buf, "%llu\n", val); } -static u64 occ_get_powr_avg(u64 *accum, u32 *samples) +static u64 occ_get_powr_avg(u64 accum, u32 samples) { - u64 divisor = get_unaligned_be32(samples); - - return (divisor == 0) ? 0 : - div64_u64(get_unaligned_be64(accum) * 1000000ULL, divisor); + return (samples == 0) ? 0 : + mul_u64_u32_div(accum, 1000000UL, samples); } static ssize_t occ_show_power_2(struct device *dev, @@ -489,8 +487,8 @@ static ssize_t occ_show_power_2(struct device *dev, get_unaligned_be32(&power->sensor_id), power->function_id, power->apss_channel); case 1: - val = occ_get_powr_avg(&power->accumulator, - &power->update_tag); + val = occ_get_powr_avg(get_unaligned_be64(&power->accumulator), + get_unaligned_be32(&power->update_tag)); break; case 2: val = (u64)get_unaligned_be32(&power->update_tag) * @@ -527,8 +525,8 @@ static ssize_t occ_show_power_a0(struct device *dev, return sysfs_emit(buf, "%u_system\n", get_unaligned_be32(&power->sensor_id)); case 1: - val = occ_get_powr_avg(&power->system.accumulator, - &power->system.update_tag); + val = occ_get_powr_avg(get_unaligned_be64(&power->system.accumulator), + get_unaligned_be32(&power->system.update_tag)); break; case 2: val = (u64)get_unaligned_be32(&power->system.update_tag) * @@ -541,8 +539,8 @@ static ssize_t occ_show_power_a0(struct device *dev, return sysfs_emit(buf, "%u_proc\n", get_unaligned_be32(&power->sensor_id)); case 5: - val = occ_get_powr_avg(&power->proc.accumulator, - &power->proc.update_tag); + val = occ_get_powr_avg(get_unaligned_be64(&power->proc.accumulator), + get_unaligned_be32(&power->proc.update_tag)); break; case 6: val = (u64)get_unaligned_be32(&power->proc.update_tag) * @@ -555,8 +553,8 @@ static ssize_t occ_show_power_a0(struct device *dev, return sysfs_emit(buf, "%u_vdd\n", get_unaligned_be32(&power->sensor_id)); case 9: - val = occ_get_powr_avg(&power->vdd.accumulator, - &power->vdd.update_tag); + val = occ_get_powr_avg(get_unaligned_be64(&power->vdd.accumulator), + get_unaligned_be32(&power->vdd.update_tag)); break; case 10: val = (u64)get_unaligned_be32(&power->vdd.update_tag) * @@ -569,8 +567,8 @@ static ssize_t occ_show_power_a0(struct device *dev, return sysfs_emit(buf, "%u_vdn\n", get_unaligned_be32(&power->sensor_id)); case 13: - val = occ_get_powr_avg(&power->vdn.accumulator, - &power->vdn.update_tag); + val = occ_get_powr_avg(get_unaligned_be64(&power->vdn.accumulator), + get_unaligned_be32(&power->vdn.update_tag)); break; case 14: val = (u64)get_unaligned_be32(&power->vdn.update_tag) * @@ -747,29 +745,30 @@ static ssize_t occ_show_extended(struct device *dev, } /* - * Some helper macros to make it easier to define an occ_attribute. Since these - * are dynamically allocated, we shouldn't use the existing kernel macros which + * A helper to make it easier to define an occ_attribute. Since these + * are dynamically allocated, we cannot use the existing kernel macros which * stringify the name argument. */ -#define ATTR_OCC(_name, _mode, _show, _store) { \ - .attr = { \ - .name = _name, \ - .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ - }, \ - .show = _show, \ - .store = _store, \ -} - -#define SENSOR_ATTR_OCC(_name, _mode, _show, _store, _nr, _index) { \ - .dev_attr = ATTR_OCC(_name, _mode, _show, _store), \ - .index = _index, \ - .nr = _nr, \ +static void occ_init_attribute(struct occ_attribute *attr, int mode, + ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf), + ssize_t (*store)(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count), + int nr, int index, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vsnprintf(attr->name, sizeof(attr->name), fmt, args); + va_end(args); + + attr->sensor.dev_attr.attr.name = attr->name; + attr->sensor.dev_attr.attr.mode = mode; + attr->sensor.dev_attr.show = show; + attr->sensor.dev_attr.store = store; + attr->sensor.index = index; + attr->sensor.nr = nr; } -#define OCC_INIT_ATTR(_name, _mode, _show, _store, _nr, _index) \ - ((struct sensor_device_attribute_2) \ - SENSOR_ATTR_OCC(_name, _mode, _show, _store, _nr, _index)) - /* * Allocate and instatiate sensor_device_attribute_2s. It's most efficient to * use our own instead of the built-in hwmon attribute types. @@ -855,14 +854,15 @@ static int occ_setup_sensor_attrs(struct occ *occ) sensors->extended.num_sensors = 0; } - occ->attrs = devm_kzalloc(dev, sizeof(*occ->attrs) * num_attrs, + occ->attrs = devm_kcalloc(dev, num_attrs, sizeof(*occ->attrs), GFP_KERNEL); if (!occ->attrs) return -ENOMEM; /* null-terminated list */ - occ->group.attrs = devm_kzalloc(dev, sizeof(*occ->group.attrs) * - num_attrs + 1, GFP_KERNEL); + occ->group.attrs = devm_kcalloc(dev, num_attrs + 1, + sizeof(*occ->group.attrs), + GFP_KERNEL); if (!occ->group.attrs) return -ENOMEM; @@ -872,43 +872,33 @@ static int occ_setup_sensor_attrs(struct occ *occ) s = i + 1; temp = ((struct temp_sensor_2 *)sensors->temp.data) + i; - snprintf(attr->name, sizeof(attr->name), "temp%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_temp, NULL, - 0, i); + occ_init_attribute(attr, 0444, show_temp, NULL, + 0, i, "temp%d_label", s); attr++; if (sensors->temp.version == 2 && temp->fru_type == OCC_FRU_TYPE_VRM) { - snprintf(attr->name, sizeof(attr->name), - "temp%d_alarm", s); + occ_init_attribute(attr, 0444, show_temp, NULL, + 1, i, "temp%d_alarm", s); } else { - snprintf(attr->name, sizeof(attr->name), - "temp%d_input", s); + occ_init_attribute(attr, 0444, show_temp, NULL, + 1, i, "temp%d_input", s); } - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_temp, NULL, - 1, i); attr++; if (sensors->temp.version > 1) { - snprintf(attr->name, sizeof(attr->name), - "temp%d_fru_type", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_temp, NULL, 2, i); + occ_init_attribute(attr, 0444, show_temp, NULL, + 2, i, "temp%d_fru_type", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "temp%d_fault", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_temp, NULL, 3, i); + occ_init_attribute(attr, 0444, show_temp, NULL, + 3, i, "temp%d_fault", s); attr++; if (sensors->temp.version == 0x10) { - snprintf(attr->name, sizeof(attr->name), - "temp%d_max", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_temp, NULL, - 4, i); + occ_init_attribute(attr, 0444, show_temp, NULL, + 4, i, "temp%d_max", s); attr++; } } @@ -917,14 +907,12 @@ static int occ_setup_sensor_attrs(struct occ *occ) for (i = 0; i < sensors->freq.num_sensors; ++i) { s = i + 1; - snprintf(attr->name, sizeof(attr->name), "freq%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_freq, NULL, - 0, i); + occ_init_attribute(attr, 0444, show_freq, NULL, + 0, i, "freq%d_label", s); attr++; - snprintf(attr->name, sizeof(attr->name), "freq%d_input", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_freq, NULL, - 1, i); + occ_init_attribute(attr, 0444, show_freq, NULL, + 1, i, "freq%d_input", s); attr++; } @@ -940,32 +928,24 @@ static int occ_setup_sensor_attrs(struct occ *occ) s = (i * 4) + 1; for (j = 0; j < 4; ++j) { - snprintf(attr->name, sizeof(attr->name), - "power%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, - nr++, i); + occ_init_attribute(attr, 0444, show_power, + NULL, nr++, i, + "power%d_label", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_average", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, - nr++, i); + occ_init_attribute(attr, 0444, show_power, + NULL, nr++, i, + "power%d_average", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_average_interval", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, - nr++, i); + occ_init_attribute(attr, 0444, show_power, + NULL, nr++, i, + "power%d_average_interval", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_input", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, - nr++, i); + occ_init_attribute(attr, 0444, show_power, + NULL, nr++, i, + "power%d_input", s); attr++; s++; @@ -977,28 +957,20 @@ static int occ_setup_sensor_attrs(struct occ *occ) for (i = 0; i < sensors->power.num_sensors; ++i) { s = i + 1; - snprintf(attr->name, sizeof(attr->name), - "power%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, 0, i); + occ_init_attribute(attr, 0444, show_power, NULL, + 0, i, "power%d_label", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_average", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, 1, i); + occ_init_attribute(attr, 0444, show_power, NULL, + 1, i, "power%d_average", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_average_interval", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, 2, i); + occ_init_attribute(attr, 0444, show_power, NULL, + 2, i, "power%d_average_interval", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_input", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, 3, i); + occ_init_attribute(attr, 0444, show_power, NULL, + 3, i, "power%d_input", s); attr++; } @@ -1006,56 +978,43 @@ static int occ_setup_sensor_attrs(struct occ *occ) } if (sensors->caps.num_sensors >= 1) { - snprintf(attr->name, sizeof(attr->name), "power%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 0, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 0, 0, "power%d_label", s); attr++; - snprintf(attr->name, sizeof(attr->name), "power%d_cap", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 1, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 1, 0, "power%d_cap", s); attr++; - snprintf(attr->name, sizeof(attr->name), "power%d_input", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 2, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 2, 0, "power%d_input", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_cap_not_redundant", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 3, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 3, 0, "power%d_cap_not_redundant", s); attr++; - snprintf(attr->name, sizeof(attr->name), "power%d_cap_max", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 4, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 4, 0, "power%d_cap_max", s); attr++; - snprintf(attr->name, sizeof(attr->name), "power%d_cap_min", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 5, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 5, 0, "power%d_cap_min", s); attr++; - snprintf(attr->name, sizeof(attr->name), "power%d_cap_user", - s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0644, show_caps, - occ_store_caps_user, 6, 0); + occ_init_attribute(attr, 0644, show_caps, occ_store_caps_user, + 6, 0, "power%d_cap_user", s); attr++; if (sensors->caps.version > 1) { - snprintf(attr->name, sizeof(attr->name), - "power%d_cap_user_source", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_caps, NULL, 7, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 7, 0, "power%d_cap_user_source", s); attr++; if (sensors->caps.version > 2) { - snprintf(attr->name, sizeof(attr->name), - "power%d_cap_min_soft", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_caps, NULL, - 8, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 8, 0, + "power%d_cap_min_soft", s); attr++; } } @@ -1064,19 +1023,16 @@ static int occ_setup_sensor_attrs(struct occ *occ) for (i = 0; i < sensors->extended.num_sensors; ++i) { s = i + 1; - snprintf(attr->name, sizeof(attr->name), "extn%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - occ_show_extended, NULL, 0, i); + occ_init_attribute(attr, 0444, occ_show_extended, NULL, + 0, i, "extn%d_label", s); attr++; - snprintf(attr->name, sizeof(attr->name), "extn%d_flags", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - occ_show_extended, NULL, 1, i); + occ_init_attribute(attr, 0444, occ_show_extended, NULL, + 1, i, "extn%d_flags", s); attr++; - snprintf(attr->name, sizeof(attr->name), "extn%d_input", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - occ_show_extended, NULL, 2, i); + occ_init_attribute(attr, 0444, occ_show_extended, NULL, + 2, i, "extn%d_input", s); attr++; } diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c index eddf25b90ca8..6544d27e4419 100644 --- a/drivers/i2c/algos/i2c-algo-bit.c +++ b/drivers/i2c/algos/i2c-algo-bit.c @@ -619,8 +619,8 @@ static u32 bit_func(struct i2c_adapter *adap) /* -----exported algorithm data: ------------------------------------- */ const struct i2c_algorithm i2c_bit_algo = { - .master_xfer = bit_xfer, - .master_xfer_atomic = bit_xfer_atomic, + .xfer = bit_xfer, + .xfer_atomic = bit_xfer_atomic, .functionality = bit_func, }; EXPORT_SYMBOL(i2c_bit_algo); diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c index 384af88e58ad..74b66aec33d4 100644 --- a/drivers/i2c/algos/i2c-algo-pca.c +++ b/drivers/i2c/algos/i2c-algo-pca.c @@ -361,8 +361,8 @@ static u32 pca_func(struct i2c_adapter *adap) } static const struct i2c_algorithm pca_algo = { - .master_xfer = pca_xfer, - .functionality = pca_func, + .xfer = pca_xfer, + .functionality = pca_func, }; static unsigned int pca_probe_chip(struct i2c_adapter *adap) diff --git a/drivers/i2c/algos/i2c-algo-pcf.c b/drivers/i2c/algos/i2c-algo-pcf.c index 740066ceaea3..fd563e845d4b 100644 --- a/drivers/i2c/algos/i2c-algo-pcf.c +++ b/drivers/i2c/algos/i2c-algo-pcf.c @@ -389,8 +389,8 @@ static u32 pcf_func(struct i2c_adapter *adap) /* exported algorithm data: */ static const struct i2c_algorithm pcf_algo = { - .master_xfer = pcf_xfer, - .functionality = pcf_func, + .xfer = pcf_xfer, + .functionality = pcf_func, }; /* diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 48c5ab832009..c8d115b58e44 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -200,7 +200,7 @@ config I2C_ISMT config I2C_PIIX4 tristate "Intel PIIX4 and compatible (ATI/AMD/Serverworks/Broadcom/SMSC)" - depends on PCI && HAS_IOPORT && X86 + depends on PCI && HAS_IOPORT select I2C_SMBUS help If you say yes to this option, support will be included for the Intel @@ -1530,7 +1530,7 @@ config I2C_XGENE_SLIMPRO config SCx200_ACB tristate "Geode ACCESS.bus support" - depends on X86_32 && PCI + depends on X86_32 && PCI && HAS_IOPORT help Enable the use of the ACCESS.bus controllers on the Geode SCx200 and SC1100 processors and the CS5535 and CS5536 Geode companion devices. diff --git a/drivers/i2c/busses/i2c-amd-mp2-plat.c b/drivers/i2c/busses/i2c-amd-mp2-plat.c index d9dd0e475d1a..188e24cc4d35 100644 --- a/drivers/i2c/busses/i2c-amd-mp2-plat.c +++ b/drivers/i2c/busses/i2c-amd-mp2-plat.c @@ -179,7 +179,7 @@ static u32 i2c_amd_func(struct i2c_adapter *a) } static const struct i2c_algorithm i2c_amd_algorithm = { - .master_xfer = i2c_amd_xfer, + .xfer = i2c_amd_xfer, .functionality = i2c_amd_func, }; diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index 1550d3d552ae..a26b74c71206 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -814,11 +814,11 @@ static int aspeed_i2c_unreg_slave(struct i2c_client *client) #endif /* CONFIG_I2C_SLAVE */ static const struct i2c_algorithm aspeed_i2c_algo = { - .master_xfer = aspeed_i2c_master_xfer, - .functionality = aspeed_i2c_functionality, + .xfer = aspeed_i2c_master_xfer, + .functionality = aspeed_i2c_functionality, #if IS_ENABLED(CONFIG_I2C_SLAVE) - .reg_slave = aspeed_i2c_reg_slave, - .unreg_slave = aspeed_i2c_unreg_slave, + .reg_slave = aspeed_i2c_reg_slave, + .unreg_slave = aspeed_i2c_unreg_slave, #endif /* CONFIG_I2C_SLAVE */ }; diff --git a/drivers/i2c/busses/i2c-at91-master.c b/drivers/i2c/busses/i2c-at91-master.c index 374fc50bb205..59795c1c24ff 100644 --- a/drivers/i2c/busses/i2c-at91-master.c +++ b/drivers/i2c/busses/i2c-at91-master.c @@ -739,8 +739,8 @@ static u32 at91_twi_func(struct i2c_adapter *adapter) } static const struct i2c_algorithm at91_twi_algorithm = { - .master_xfer = at91_twi_xfer, - .functionality = at91_twi_func, + .xfer = at91_twi_xfer, + .functionality = at91_twi_func, }; static int at91_twi_configure_dma(struct at91_twi_dev *dev, u32 phy_addr) diff --git a/drivers/i2c/busses/i2c-axxia.c b/drivers/i2c/busses/i2c-axxia.c index 50030256cd85..0555eeb6903a 100644 --- a/drivers/i2c/busses/i2c-axxia.c +++ b/drivers/i2c/busses/i2c-axxia.c @@ -706,7 +706,7 @@ static int axxia_i2c_unreg_slave(struct i2c_client *slave) } static const struct i2c_algorithm axxia_i2c_algo = { - .master_xfer = axxia_i2c_xfer, + .xfer = axxia_i2c_xfer, .functionality = axxia_i2c_func, .reg_slave = axxia_i2c_reg_slave, .unreg_slave = axxia_i2c_unreg_slave, diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c index 63bc3c8f49d3..e418a4f23f15 100644 --- a/drivers/i2c/busses/i2c-bcm-iproc.c +++ b/drivers/i2c/busses/i2c-bcm-iproc.c @@ -1041,7 +1041,7 @@ static int bcm_iproc_i2c_unreg_slave(struct i2c_client *slave) } static struct i2c_algorithm bcm_iproc_algo = { - .master_xfer = bcm_iproc_i2c_xfer, + .xfer = bcm_iproc_i2c_xfer, .functionality = bcm_iproc_i2c_functionality, .reg_slave = bcm_iproc_i2c_reg_slave, .unreg_slave = bcm_iproc_i2c_unreg_slave, diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 8df63aaf2a80..697d095afbe4 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -1231,12 +1231,12 @@ static int cdns_unreg_slave(struct i2c_client *slave) #endif static const struct i2c_algorithm cdns_i2c_algo = { - .master_xfer = cdns_i2c_master_xfer, - .master_xfer_atomic = cdns_i2c_master_xfer_atomic, - .functionality = cdns_i2c_func, + .xfer = cdns_i2c_master_xfer, + .xfer_atomic = cdns_i2c_master_xfer_atomic, + .functionality = cdns_i2c_func, #if IS_ENABLED(CONFIG_I2C_SLAVE) - .reg_slave = cdns_reg_slave, - .unreg_slave = cdns_unreg_slave, + .reg_slave = cdns_reg_slave, + .unreg_slave = cdns_unreg_slave, #endif }; diff --git a/drivers/i2c/busses/i2c-cgbc.c b/drivers/i2c/busses/i2c-cgbc.c index f054d167ac47..25a74fa51aa0 100644 --- a/drivers/i2c/busses/i2c-cgbc.c +++ b/drivers/i2c/busses/i2c-cgbc.c @@ -331,8 +331,8 @@ static u32 cgbc_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm cgbc_i2c_algorithm = { - .master_xfer = cgbc_i2c_xfer, - .functionality = cgbc_i2c_func, + .xfer = cgbc_i2c_xfer, + .functionality = cgbc_i2c_func, }; static struct i2c_algo_cgbc_data cgbc_i2c_algo_data[] = { diff --git a/drivers/i2c/busses/i2c-designware-amdisp.c b/drivers/i2c/busses/i2c-designware-amdisp.c index ad6f08338124..450793d5f839 100644 --- a/drivers/i2c/busses/i2c-designware-amdisp.c +++ b/drivers/i2c/busses/i2c-designware-amdisp.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <linux/soc/amd/isp4_misc.h> #include "i2c-designware-core.h" @@ -62,6 +63,7 @@ static int amd_isp_dw_i2c_plat_probe(struct platform_device *pdev) adap = &isp_i2c_dev->adapter; adap->owner = THIS_MODULE; + scnprintf(adap->name, sizeof(adap->name), AMDISP_I2C_ADAP_NAME); ACPI_COMPANION_SET(&adap->dev, ACPI_COMPANION(&pdev->dev)); adap->dev.of_node = pdev->dev.of_node; /* use dynamically allocated adapter id */ diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index c5394229b77f..cbd88ffa5610 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -363,6 +363,7 @@ static int amd_i2c_dw_xfer_quirk(struct i2c_adapter *adap, struct i2c_msg *msgs, dev->msgs = msgs; dev->msgs_num = num_msgs; + dev->msg_write_idx = 0; i2c_dw_xfer_init(dev); /* Initiate messages read/write transaction */ @@ -1042,8 +1043,9 @@ int i2c_dw_probe_master(struct dw_i2c_dev *dev) if (ret) return ret; - snprintf(adap->name, sizeof(adap->name), - "Synopsys DesignWare I2C adapter"); + if (!adap->name[0]) + scnprintf(adap->name, sizeof(adap->name), + "Synopsys DesignWare I2C adapter"); adap->retries = 3; adap->algo = &i2c_dw_algo; adap->quirks = &i2c_dw_quirks; diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c index efdaddf99f9e..27ea3c130a16 100644 --- a/drivers/i2c/busses/i2c-eg20t.c +++ b/drivers/i2c/busses/i2c-eg20t.c @@ -690,7 +690,7 @@ static u32 pch_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm pch_algorithm = { - .master_xfer = pch_i2c_xfer, + .xfer = pch_i2c_xfer, .functionality = pch_i2c_func }; diff --git a/drivers/i2c/busses/i2c-emev2.c b/drivers/i2c/busses/i2c-emev2.c index 2512cef8e2a2..ece019b3d066 100644 --- a/drivers/i2c/busses/i2c-emev2.c +++ b/drivers/i2c/busses/i2c-emev2.c @@ -351,10 +351,10 @@ static int em_i2c_unreg_slave(struct i2c_client *slave) } static const struct i2c_algorithm em_i2c_algo = { - .master_xfer = em_i2c_xfer, + .xfer = em_i2c_xfer, .functionality = em_i2c_func, - .reg_slave = em_i2c_reg_slave, - .unreg_slave = em_i2c_unreg_slave, + .reg_slave = em_i2c_reg_slave, + .unreg_slave = em_i2c_unreg_slave, }; static int em_i2c_probe(struct platform_device *pdev) diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c index 02f24479aa07..9c1c5f3c09f6 100644 --- a/drivers/i2c/busses/i2c-exynos5.c +++ b/drivers/i2c/busses/i2c-exynos5.c @@ -879,9 +879,9 @@ static u32 exynos5_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm exynos5_i2c_algorithm = { - .master_xfer = exynos5_i2c_xfer, - .master_xfer_atomic = exynos5_i2c_xfer_atomic, - .functionality = exynos5_i2c_func, + .xfer = exynos5_i2c_xfer, + .xfer_atomic = exynos5_i2c_xfer_atomic, + .functionality = exynos5_i2c_func, }; static int exynos5_i2c_probe(struct platform_device *pdev) diff --git a/drivers/i2c/busses/i2c-gxp.c b/drivers/i2c/busses/i2c-gxp.c index 0fc39caa6c87..2d117e7e3cb6 100644 --- a/drivers/i2c/busses/i2c-gxp.c +++ b/drivers/i2c/busses/i2c-gxp.c @@ -184,11 +184,11 @@ static int gxp_i2c_unreg_slave(struct i2c_client *slave) #endif static const struct i2c_algorithm gxp_i2c_algo = { - .master_xfer = gxp_i2c_master_xfer, + .xfer = gxp_i2c_master_xfer, .functionality = gxp_i2c_func, #if IS_ENABLED(CONFIG_I2C_SLAVE) - .reg_slave = gxp_i2c_reg_slave, - .unreg_slave = gxp_i2c_unreg_slave, + .reg_slave = gxp_i2c_reg_slave, + .unreg_slave = gxp_i2c_unreg_slave, #endif }; diff --git a/drivers/i2c/busses/i2c-img-scb.c b/drivers/i2c/busses/i2c-img-scb.c index 3278707bb885..a454f9f25146 100644 --- a/drivers/i2c/busses/i2c-img-scb.c +++ b/drivers/i2c/busses/i2c-img-scb.c @@ -1143,7 +1143,7 @@ static u32 img_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm img_i2c_algo = { - .master_xfer = img_i2c_xfer, + .xfer = img_i2c_xfer, .functionality = img_i2c_func, }; diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index 342d47e67586..064bc83840a6 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -1268,10 +1268,10 @@ static u32 lpi2c_imx_func(struct i2c_adapter *adapter) } static const struct i2c_algorithm lpi2c_imx_algo = { - .master_xfer = lpi2c_imx_xfer, - .functionality = lpi2c_imx_func, - .reg_target = lpi2c_imx_register_target, - .unreg_target = lpi2c_imx_unregister_target, + .xfer = lpi2c_imx_xfer, + .functionality = lpi2c_imx_func, + .reg_target = lpi2c_imx_register_target, + .unreg_target = lpi2c_imx_unregister_target, }; static const struct of_device_id lpi2c_imx_of_match[] = { diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index de01dfecb16e..205cc132fdec 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1008,7 +1008,7 @@ static inline int i2c_imx_isr_read(struct imx_i2c_struct *i2c_imx) /* setup bus to read data */ temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR); temp &= ~I2CR_MTX; - if (i2c_imx->msg->len - 1) + if ((i2c_imx->msg->len - 1) || (i2c_imx->msg->flags & I2C_M_RECV_LEN)) temp &= ~I2CR_TXAK; imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR); @@ -1063,6 +1063,7 @@ static inline void i2c_imx_isr_read_block_data_len(struct imx_i2c_struct *i2c_im wake_up(&i2c_imx->queue); } i2c_imx->msg->len += len; + i2c_imx->msg->buf[i2c_imx->msg_buf_idx++] = len; } static irqreturn_t i2c_imx_master_isr(struct imx_i2c_struct *i2c_imx, unsigned int status) @@ -1692,11 +1693,11 @@ static u32 i2c_imx_func(struct i2c_adapter *adapter) } static const struct i2c_algorithm i2c_imx_algo = { - .master_xfer = i2c_imx_xfer, - .master_xfer_atomic = i2c_imx_xfer_atomic, + .xfer = i2c_imx_xfer, + .xfer_atomic = i2c_imx_xfer_atomic, .functionality = i2c_imx_func, - .reg_slave = i2c_imx_reg_slave, - .unreg_slave = i2c_imx_unreg_slave, + .reg_slave = i2c_imx_reg_slave, + .unreg_slave = i2c_imx_unreg_slave, }; static int i2c_imx_probe(struct platform_device *pdev) diff --git a/drivers/i2c/busses/i2c-k1.c b/drivers/i2c/busses/i2c-k1.c index 5965b4cf6220..b68a21fff0b5 100644 --- a/drivers/i2c/busses/i2c-k1.c +++ b/drivers/i2c/busses/i2c-k1.c @@ -477,7 +477,7 @@ static int spacemit_i2c_xfer(struct i2c_adapter *adapt, struct i2c_msg *msgs, in ret = spacemit_i2c_wait_bus_idle(i2c); if (!ret) - spacemit_i2c_xfer_msg(i2c); + ret = spacemit_i2c_xfer_msg(i2c); else if (ret < 0) dev_dbg(i2c->dev, "i2c transfer error: %d\n", ret); else diff --git a/drivers/i2c/busses/i2c-keba.c b/drivers/i2c/busses/i2c-keba.c index 7b9ed2592f5b..9420c8b342b5 100644 --- a/drivers/i2c/busses/i2c-keba.c +++ b/drivers/i2c/busses/i2c-keba.c @@ -500,7 +500,7 @@ static u32 ki2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm ki2c_algo = { - .master_xfer = ki2c_xfer, + .xfer = ki2c_xfer, .functionality = ki2c_func, }; diff --git a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c index 5ef136c3ecb1..bc0f1a0c8ee1 100644 --- a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c +++ b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c @@ -1048,7 +1048,7 @@ static u32 pci1xxxx_i2c_get_funcs(struct i2c_adapter *adap) } static const struct i2c_algorithm pci1xxxx_i2c_algo = { - .master_xfer = pci1xxxx_i2c_xfer, + .xfer = pci1xxxx_i2c_xfer, .functionality = pci1xxxx_i2c_get_funcs, }; diff --git a/drivers/i2c/busses/i2c-meson.c b/drivers/i2c/busses/i2c-meson.c index e1d69537353b..0d9032953e48 100644 --- a/drivers/i2c/busses/i2c-meson.c +++ b/drivers/i2c/busses/i2c-meson.c @@ -448,8 +448,8 @@ static u32 meson_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm meson_i2c_algorithm = { - .master_xfer = meson_i2c_xfer, - .master_xfer_atomic = meson_i2c_xfer_atomic, + .xfer = meson_i2c_xfer, + .xfer_atomic = meson_i2c_xfer_atomic, .functionality = meson_i2c_func, }; diff --git a/drivers/i2c/busses/i2c-microchip-corei2c.c b/drivers/i2c/busses/i2c-microchip-corei2c.c index 492bf4c34722..c8599733633e 100644 --- a/drivers/i2c/busses/i2c-microchip-corei2c.c +++ b/drivers/i2c/busses/i2c-microchip-corei2c.c @@ -435,6 +435,7 @@ static int mchp_corei2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned u8 tx_buf[I2C_SMBUS_BLOCK_MAX + 2]; u8 rx_buf[I2C_SMBUS_BLOCK_MAX + 1]; int num_msgs = 1; + int ret; msgs[CORE_I2C_SMBUS_MSG_WR].addr = addr; msgs[CORE_I2C_SMBUS_MSG_WR].flags = 0; @@ -505,7 +506,10 @@ static int mchp_corei2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned return -EOPNOTSUPP; } - mchp_corei2c_xfer(&idev->adapter, msgs, num_msgs); + ret = mchp_corei2c_xfer(&idev->adapter, msgs, num_msgs); + if (ret < 0) + return ret; + if (read_write == I2C_SMBUS_WRITE || size <= I2C_SMBUS_BYTE_DATA) return 0; @@ -526,7 +530,7 @@ static int mchp_corei2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned } static const struct i2c_algorithm mchp_corei2c_algo = { - .master_xfer = mchp_corei2c_xfer, + .xfer = mchp_corei2c_xfer, .functionality = mchp_corei2c_func, .smbus_xfer = mchp_corei2c_smbus_xfer, }; diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 5bd342047d59..ab456c3717db 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -1342,7 +1342,7 @@ static u32 mtk_i2c_functionality(struct i2c_adapter *adap) } static const struct i2c_algorithm mtk_i2c_algorithm = { - .master_xfer = mtk_i2c_transfer, + .xfer = mtk_i2c_transfer, .functionality = mtk_i2c_functionality, }; diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c index ad62d56b2186..08c9091a1e35 100644 --- a/drivers/i2c/busses/i2c-mxs.c +++ b/drivers/i2c/busses/i2c-mxs.c @@ -687,7 +687,7 @@ static irqreturn_t mxs_i2c_isr(int this_irq, void *dev_id) } static const struct i2c_algorithm mxs_i2c_algo = { - .master_xfer = mxs_i2c_xfer, + .xfer = mxs_i2c_xfer, .functionality = mxs_i2c_func, }; diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c index d2877e4cc28d..19b648fc094d 100644 --- a/drivers/i2c/busses/i2c-nomadik.c +++ b/drivers/i2c/busses/i2c-nomadik.c @@ -996,8 +996,8 @@ static unsigned int nmk_i2c_functionality(struct i2c_adapter *adap) } static const struct i2c_algorithm nmk_i2c_algo = { - .master_xfer = nmk_i2c_xfer, - .functionality = nmk_i2c_functionality + .xfer = nmk_i2c_xfer, + .functionality = nmk_i2c_functionality }; static void nmk_i2c_of_probe(struct device_node *np, diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 892e2d2988a7..8b7e15240fb0 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -2470,11 +2470,11 @@ static const struct i2c_adapter_quirks npcm_i2c_quirks = { }; static const struct i2c_algorithm npcm_i2c_algo = { - .master_xfer = npcm_i2c_master_xfer, + .xfer = npcm_i2c_master_xfer, .functionality = npcm_i2c_functionality, #if IS_ENABLED(CONFIG_I2C_SLAVE) - .reg_slave = npcm_i2c_reg_slave, - .unreg_slave = npcm_i2c_unreg_slave, + .reg_slave = npcm_i2c_reg_slave, + .unreg_slave = npcm_i2c_unreg_slave, #endif }; diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 876791d20ed5..8b01df3cc8e9 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1201,9 +1201,9 @@ omap_i2c_isr_thread(int this_irq, void *dev_id) } static const struct i2c_algorithm omap_i2c_algo = { - .master_xfer = omap_i2c_xfer_irq, - .master_xfer_atomic = omap_i2c_xfer_polling, - .functionality = omap_i2c_func, + .xfer = omap_i2c_xfer_irq, + .xfer_atomic = omap_i2c_xfer_polling, + .functionality = omap_i2c_func, }; static const struct i2c_adapter_quirks omap_i2c_quirks = { @@ -1461,13 +1461,13 @@ omap_i2c_probe(struct platform_device *pdev) if (IS_ERR(mux_state)) { r = PTR_ERR(mux_state); dev_dbg(&pdev->dev, "failed to get I2C mux: %d\n", r); - goto err_disable_pm; + goto err_put_pm; } omap->mux_state = mux_state; r = mux_state_select(omap->mux_state); if (r) { dev_err(&pdev->dev, "failed to select I2C mux: %d\n", r); - goto err_disable_pm; + goto err_put_pm; } } @@ -1515,6 +1515,9 @@ omap_i2c_probe(struct platform_device *pdev) err_unuse_clocks: omap_i2c_write_reg(omap, OMAP_I2C_CON_REG, 0); + if (omap->mux_state) + mux_state_deselect(omap->mux_state); +err_put_pm: pm_runtime_dont_use_autosuspend(omap->dev); pm_runtime_put_sync(omap->dev); err_disable_pm: diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 9d3a4dc2bd60..ac3bb550303f 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -34,7 +34,7 @@ #include <linux/dmi.h> #include <linux/acpi.h> #include <linux/io.h> -#include <asm/amd/fch.h> +#include <linux/platform_data/x86/amd-fch.h> #include "i2c-piix4.h" diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c index 9a1af5bbd604..8daa0008bd05 100644 --- a/drivers/i2c/busses/i2c-pnx.c +++ b/drivers/i2c/busses/i2c-pnx.c @@ -580,7 +580,7 @@ static u32 i2c_pnx_func(struct i2c_adapter *adapter) } static const struct i2c_algorithm pnx_algorithm = { - .master_xfer = i2c_pnx_xfer, + .xfer = i2c_pnx_xfer, .functionality = i2c_pnx_func, }; diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c index 4415a29f749b..968a8b8794da 100644 --- a/drivers/i2c/busses/i2c-pxa.c +++ b/drivers/i2c/busses/i2c-pxa.c @@ -1154,11 +1154,11 @@ static u32 i2c_pxa_functionality(struct i2c_adapter *adap) } static const struct i2c_algorithm i2c_pxa_algorithm = { - .master_xfer = i2c_pxa_xfer, - .functionality = i2c_pxa_functionality, + .xfer = i2c_pxa_xfer, + .functionality = i2c_pxa_functionality, #ifdef CONFIG_I2C_PXA_SLAVE - .reg_slave = i2c_pxa_slave_reg, - .unreg_slave = i2c_pxa_slave_unreg, + .reg_slave = i2c_pxa_slave_reg, + .unreg_slave = i2c_pxa_slave_unreg, #endif }; @@ -1244,11 +1244,11 @@ static int i2c_pxa_pio_xfer(struct i2c_adapter *adap, } static const struct i2c_algorithm i2c_pxa_pio_algorithm = { - .master_xfer = i2c_pxa_pio_xfer, - .functionality = i2c_pxa_functionality, + .xfer = i2c_pxa_pio_xfer, + .functionality = i2c_pxa_functionality, #ifdef CONFIG_I2C_PXA_SLAVE - .reg_slave = i2c_pxa_slave_reg, - .unreg_slave = i2c_pxa_slave_unreg, + .reg_slave = i2c_pxa_slave_reg, + .unreg_slave = i2c_pxa_slave_unreg, #endif }; diff --git a/drivers/i2c/busses/i2c-qcom-cci.c b/drivers/i2c/busses/i2c-qcom-cci.c index 05b73326afd4..a3afa11a71a1 100644 --- a/drivers/i2c/busses/i2c-qcom-cci.c +++ b/drivers/i2c/busses/i2c-qcom-cci.c @@ -462,8 +462,8 @@ static u32 cci_func(struct i2c_adapter *adap) } static const struct i2c_algorithm cci_algo = { - .master_xfer = cci_xfer, - .functionality = cci_func, + .xfer = cci_xfer, + .functionality = cci_func, }; static int cci_enable_clocks(struct cci *cci) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index ccea575fb783..13889f52b6f7 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -727,8 +727,8 @@ static u32 geni_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm geni_i2c_algo = { - .master_xfer = geni_i2c_xfer, - .functionality = geni_i2c_func, + .xfer = geni_i2c_xfer, + .functionality = geni_i2c_func, }; #ifdef CONFIG_ACPI diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c index 3a36d682ed57..6059f585843e 100644 --- a/drivers/i2c/busses/i2c-qup.c +++ b/drivers/i2c/busses/i2c-qup.c @@ -1634,13 +1634,13 @@ static u32 qup_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm qup_i2c_algo = { - .master_xfer = qup_i2c_xfer, - .functionality = qup_i2c_func, + .xfer = qup_i2c_xfer, + .functionality = qup_i2c_func, }; static const struct i2c_algorithm qup_i2c_algo_v2 = { - .master_xfer = qup_i2c_xfer_v2, - .functionality = qup_i2c_func, + .xfer = qup_i2c_xfer_v2, + .functionality = qup_i2c_func, }; /* diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 5693a38da7b5..d51884ab99f4 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -1084,11 +1084,11 @@ static u32 rcar_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm rcar_i2c_algo = { - .master_xfer = rcar_i2c_master_xfer, - .master_xfer_atomic = rcar_i2c_master_xfer_atomic, - .functionality = rcar_i2c_func, - .reg_slave = rcar_reg_slave, - .unreg_slave = rcar_unreg_slave, + .xfer = rcar_i2c_master_xfer, + .xfer_atomic = rcar_i2c_master_xfer_atomic, + .functionality = rcar_i2c_func, + .reg_slave = rcar_reg_slave, + .unreg_slave = rcar_unreg_slave, }; static const struct i2c_adapter_quirks rcar_i2c_quirks = { diff --git a/drivers/i2c/busses/i2c-robotfuzz-osif.c b/drivers/i2c/busses/i2c-robotfuzz-osif.c index 80d45079b763..e0a76fb5bc31 100644 --- a/drivers/i2c/busses/i2c-robotfuzz-osif.c +++ b/drivers/i2c/busses/i2c-robotfuzz-osif.c @@ -111,6 +111,11 @@ static u32 osif_func(struct i2c_adapter *adapter) return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; } +/* prevent invalid 0-length usb_control_msg */ +static const struct i2c_adapter_quirks osif_quirks = { + .flags = I2C_AQ_NO_ZERO_LEN_READ, +}; + static const struct i2c_algorithm osif_algorithm = { .xfer = osif_xfer, .functionality = osif_func, @@ -143,6 +148,7 @@ static int osif_probe(struct usb_interface *interface, priv->adapter.owner = THIS_MODULE; priv->adapter.class = I2C_CLASS_HWMON; + priv->adapter.quirks = &osif_quirks; priv->adapter.algo = &osif_algorithm; priv->adapter.algo_data = priv; snprintf(priv->adapter.name, sizeof(priv->adapter.name), diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index 0f3cf500df68..f4fa4703acbd 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -800,9 +800,9 @@ static u32 s3c24xx_i2c_func(struct i2c_adapter *adap) /* i2c bus registration info */ static const struct i2c_algorithm s3c24xx_i2c_algorithm = { - .master_xfer = s3c24xx_i2c_xfer, - .master_xfer_atomic = s3c24xx_i2c_xfer_atomic, - .functionality = s3c24xx_i2c_func, + .xfer = s3c24xx_i2c_xfer, + .xfer_atomic = s3c24xx_i2c_xfer_atomic, + .functionality = s3c24xx_i2c_func, }; /* diff --git a/drivers/i2c/busses/i2c-sh7760.c b/drivers/i2c/busses/i2c-sh7760.c index 620f12596763..43f33988b98f 100644 --- a/drivers/i2c/busses/i2c-sh7760.c +++ b/drivers/i2c/busses/i2c-sh7760.c @@ -379,8 +379,8 @@ static u32 sh7760_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm sh7760_i2c_algo = { - .master_xfer = sh7760_i2c_master_xfer, - .functionality = sh7760_i2c_func, + .xfer = sh7760_i2c_master_xfer, + .functionality = sh7760_i2c_func, }; /* calculate CCR register setting for a desired scl clock. SCL clock is diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c index adfcee6c9fdc..dae8967f8749 100644 --- a/drivers/i2c/busses/i2c-sh_mobile.c +++ b/drivers/i2c/busses/i2c-sh_mobile.c @@ -740,8 +740,8 @@ static u32 sh_mobile_i2c_func(struct i2c_adapter *adapter) static const struct i2c_algorithm sh_mobile_i2c_algorithm = { .functionality = sh_mobile_i2c_func, - .master_xfer = sh_mobile_i2c_xfer, - .master_xfer_atomic = sh_mobile_i2c_xfer_atomic, + .xfer = sh_mobile_i2c_xfer, + .xfer_atomic = sh_mobile_i2c_xfer_atomic, }; static const struct i2c_adapter_quirks sh_mobile_i2c_quirks = { diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index 973a3a8c6d4a..e4aaeb2262d0 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -2151,8 +2151,8 @@ static u32 stm32f7_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm stm32f7_i2c_algo = { - .master_xfer = stm32f7_i2c_xfer, - .master_xfer_atomic = stm32f7_i2c_xfer_atomic, + .xfer = stm32f7_i2c_xfer, + .xfer_atomic = stm32f7_i2c_xfer_atomic, .smbus_xfer = stm32f7_i2c_smbus_xfer, .functionality = stm32f7_i2c_func, .reg_slave = stm32f7_i2c_reg_slave, diff --git a/drivers/i2c/busses/i2c-synquacer.c b/drivers/i2c/busses/i2c-synquacer.c index 31f8d08e32a4..1230f51e1624 100644 --- a/drivers/i2c/busses/i2c-synquacer.c +++ b/drivers/i2c/busses/i2c-synquacer.c @@ -520,8 +520,8 @@ static u32 synquacer_i2c_functionality(struct i2c_adapter *adap) } static const struct i2c_algorithm synquacer_i2c_algo = { - .master_xfer = synquacer_i2c_xfer, - .functionality = synquacer_i2c_functionality, + .xfer = synquacer_i2c_xfer, + .functionality = synquacer_i2c_functionality, }; static const struct i2c_adapter synquacer_i2c_ops = { diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 049b4d154c23..0862b98007f5 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1440,9 +1440,9 @@ static u32 tegra_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm tegra_i2c_algo = { - .master_xfer = tegra_i2c_xfer, - .master_xfer_atomic = tegra_i2c_xfer_atomic, - .functionality = tegra_i2c_func, + .xfer = tegra_i2c_xfer, + .xfer_atomic = tegra_i2c_xfer_atomic, + .functionality = tegra_i2c_func, }; /* payload size is only 12 bit */ diff --git a/drivers/i2c/busses/i2c-tiny-usb.c b/drivers/i2c/busses/i2c-tiny-usb.c index a18eab0992a1..57dfe5f1a7d9 100644 --- a/drivers/i2c/busses/i2c-tiny-usb.c +++ b/drivers/i2c/busses/i2c-tiny-usb.c @@ -139,6 +139,11 @@ out: return ret; } +/* prevent invalid 0-length usb_control_msg */ +static const struct i2c_adapter_quirks usb_quirks = { + .flags = I2C_AQ_NO_ZERO_LEN_READ, +}; + /* This is the actual algorithm we define */ static const struct i2c_algorithm usb_algorithm = { .xfer = usb_xfer, @@ -247,6 +252,7 @@ static int i2c_tiny_usb_probe(struct usb_interface *interface, /* setup i2c adapter description */ dev->adapter.owner = THIS_MODULE; dev->adapter.class = I2C_CLASS_HWMON; + dev->adapter.quirks = &usb_quirks; dev->adapter.algo = &usb_algorithm; dev->adapter.algo_data = dev; snprintf(dev->adapter.name, sizeof(dev->adapter.name), diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 6bc1575cea6c..607026c921d6 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -1398,8 +1398,8 @@ static u32 xiic_func(struct i2c_adapter *adap) } static const struct i2c_algorithm xiic_algorithm = { - .master_xfer = xiic_xfer, - .master_xfer_atomic = xiic_xfer_atomic, + .xfer = xiic_xfer, + .xfer_atomic = xiic_xfer_atomic, .functionality = xiic_func, }; diff --git a/drivers/i2c/busses/i2c-xlp9xx.c b/drivers/i2c/busses/i2c-xlp9xx.c index 4d5e49b6321b..ddb1c3e8bc9d 100644 --- a/drivers/i2c/busses/i2c-xlp9xx.c +++ b/drivers/i2c/busses/i2c-xlp9xx.c @@ -452,7 +452,7 @@ static u32 xlp9xx_i2c_functionality(struct i2c_adapter *adapter) } static const struct i2c_algorithm xlp9xx_i2c_algo = { - .master_xfer = xlp9xx_i2c_xfer, + .xfer = xlp9xx_i2c_xfer, .functionality = xlp9xx_i2c_functionality, }; diff --git a/drivers/i2c/i2c-atr.c b/drivers/i2c/i2c-atr.c index be7d6d41e0b2..dd194476b118 100644 --- a/drivers/i2c/i2c-atr.c +++ b/drivers/i2c/i2c-atr.c @@ -738,7 +738,7 @@ struct i2c_atr *i2c_atr_new(struct i2c_adapter *parent, struct device *dev, atr->flags = flags; if (parent->algo->master_xfer) - atr->algo.master_xfer = i2c_atr_master_xfer; + atr->algo.xfer = i2c_atr_master_xfer; if (parent->algo->smbus_xfer) atr->algo.smbus_xfer = i2c_atr_smbus_xfer; atr->algo.functionality = i2c_atr_functionality; diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index fda72e8be885..4d8690981a55 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -293,12 +293,12 @@ int i2c_mux_add_adapter(struct i2c_mux_core *muxc, */ if (parent->algo->master_xfer) { if (muxc->mux_locked) - priv->algo.master_xfer = i2c_mux_master_xfer; + priv->algo.xfer = i2c_mux_master_xfer; else - priv->algo.master_xfer = __i2c_mux_master_xfer; + priv->algo.xfer = __i2c_mux_master_xfer; } if (parent->algo->master_xfer_atomic) - priv->algo.master_xfer_atomic = priv->algo.master_xfer; + priv->algo.xfer_atomic = priv->algo.master_xfer; if (parent->algo->smbus_xfer) { if (muxc->mux_locked) diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c index 77a740561fd7..f2a1f4744978 100644 --- a/drivers/i2c/muxes/i2c-demux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c @@ -95,9 +95,9 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne priv->cur_chan = new_chan; /* Now fill out current adapter structure. cur_chan must be up to date */ - priv->algo.master_xfer = i2c_demux_master_xfer; + priv->algo.xfer = i2c_demux_master_xfer; if (adap->algo->master_xfer_atomic) - priv->algo.master_xfer_atomic = i2c_demux_master_xfer; + priv->algo.xfer_atomic = i2c_demux_master_xfer; priv->algo.functionality = i2c_demux_functionality; snprintf(priv->cur_adap.name, sizeof(priv->cur_adap.name), diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 9979a351577f..81cf3c902e81 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -582,8 +582,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u32 port, out_unlock: mutex_unlock(&table->lock); if (ret) - pr_warn("%s: unable to add gid %pI6 error=%d\n", - __func__, gid->raw, ret); + pr_warn_ratelimited("%s: unable to add gid %pI6 error=%d\n", + __func__, gid->raw, ret); return ret; } diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index c752ae9fad6c..b1c44ec1a3f3 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -76,6 +76,17 @@ static int ib_init_umem_odp(struct ib_umem_odp *umem_odp, end = ALIGN(end, page_size); if (unlikely(end < page_size)) return -EOVERFLOW; + /* + * The mmu notifier can be called within reclaim contexts and takes the + * umem_mutex. This is rare to trigger in testing, teach lockdep about + * it. + */ + if (IS_ENABLED(CONFIG_LOCKDEP)) { + fs_reclaim_acquire(GFP_KERNEL); + mutex_lock(&umem_odp->umem_mutex); + mutex_unlock(&umem_odp->umem_mutex); + fs_reclaim_release(GFP_KERNEL); + } nr_entries = (end - start) >> PAGE_SHIFT; if (!(nr_entries * PAGE_SIZE / page_size)) diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index b847084dcd99..a506fafd2b15 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -398,7 +398,7 @@ static int do_get_hw_stats(struct ib_device *ibdev, return ret; /* We don't expose device counters over Vports */ - if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0) + if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0) goto done; if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { @@ -418,7 +418,7 @@ static int do_get_hw_stats(struct ib_device *ibdev, */ goto done; } - ret = mlx5_lag_query_cong_counters(dev->mdev, + ret = mlx5_lag_query_cong_counters(mdev, stats->value + cnts->num_q_counters, cnts->num_cong_counters, diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 2479da8620ca..843dcd312242 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1958,6 +1958,7 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, /* Level1 is valid for future use, no need to free */ return -ENOMEM; + INIT_LIST_HEAD(&obj_event->obj_sub_list); err = xa_insert(&event->object_ids, key_level2, obj_event, @@ -1966,7 +1967,6 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, kfree(obj_event); return err; } - INIT_LIST_HEAD(&obj_event->obj_sub_list); } return 0; @@ -2669,7 +2669,7 @@ static void devx_wait_async_destroy(struct mlx5_async_cmd *cmd) void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile) { - struct mlx5_async_cmd async_cmd[MAX_ASYNC_CMDS]; + struct mlx5_async_cmd *async_cmd; struct ib_ucontext *ucontext = ufile->ucontext; struct ib_device *device = ucontext->device; struct mlx5_ib_dev *dev = to_mdev(device); @@ -2678,6 +2678,10 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile) int head = 0; int tail = 0; + async_cmd = kcalloc(MAX_ASYNC_CMDS, sizeof(*async_cmd), GFP_KERNEL); + if (!async_cmd) + return; + list_for_each_entry(uobject, &ufile->uobjects, list) { WARN_ON(uverbs_try_lock_object(uobject, UVERBS_LOOKUP_WRITE)); @@ -2713,6 +2717,8 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile) devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]); head++; } + + kfree(async_cmd); } static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ce7610740412..df6557ddbdfc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1791,6 +1791,33 @@ static void deallocate_uars(struct mlx5_ib_dev *dev, context->devx_uid); } +static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + int err; + + err = mlx5_nic_vport_update_local_lb(master, true); + if (err) + return err; + + err = mlx5_nic_vport_update_local_lb(slave, true); + if (err) + goto out; + + return 0; + +out: + mlx5_nic_vport_update_local_lb(master, false); + return err; +} + +static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + mlx5_nic_vport_update_local_lb(slave, false); + mlx5_nic_vport_update_local_lb(master, false); +} + int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp) { int err = 0; @@ -3495,6 +3522,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, lockdep_assert_held(&mlx5_ib_multiport_mutex); + mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev); + mlx5_core_mp_event_replay(ibdev->mdev, MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, NULL); @@ -3590,6 +3619,10 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, MLX5_DRIVER_EVENT_AFFILIATION_DONE, &key); + err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev); + if (err) + goto unbind; + return true; unbind: diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 57f9bc2a4a3a..bd35e75d9ce5 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -2027,23 +2027,50 @@ void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev) } } -static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) +static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr) { - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; - bool is_odp = is_odp_mr(mr); bool is_odp_dma_buf = is_dmabuf_mr(mr) && - !to_ib_umem_dmabuf(mr->umem)->pinned; - bool from_cache = !!ent; - int ret = 0; + !to_ib_umem_dmabuf(mr->umem)->pinned; + bool is_odp = is_odp_mr(mr); + int ret; if (is_odp) mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); if (is_odp_dma_buf) - dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, NULL); + dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, + NULL); + + ret = mlx5r_umr_revoke_mr(mr); + + if (is_odp) { + if (!ret) + to_ib_umem_odp(mr->umem)->private = NULL; + mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); + } + + if (is_odp_dma_buf) { + if (!ret) + to_ib_umem_dmabuf(mr->umem)->private = NULL; + dma_resv_unlock( + to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); + } - if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) { + return ret; +} + +static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr) +{ + bool is_odp_dma_buf = is_dmabuf_mr(mr) && + !to_ib_umem_dmabuf(mr->umem)->pinned; + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; + bool is_odp = is_odp_mr(mr); + bool from_cache = !!ent; + int ret; + + if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) && + !cache_ent_find_and_store(dev, mr)) { ent = mr->mmkey.cache_ent; /* upon storing to a clean temp entry - schedule its cleanup */ spin_lock_irq(&ent->mkeys_queue.lock); @@ -2055,7 +2082,7 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) ent->tmp_cleanup_scheduled = true; } spin_unlock_irq(&ent->mkeys_queue.lock); - goto out; + return 0; } if (ent) { @@ -2064,8 +2091,14 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) mr->mmkey.cache_ent = NULL; spin_unlock_irq(&ent->mkeys_queue.lock); } + + if (is_odp) + mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); + + if (is_odp_dma_buf) + dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, + NULL); ret = destroy_mkey(dev, mr); -out: if (is_odp) { if (!ret) to_ib_umem_odp(mr->umem)->private = NULL; @@ -2075,9 +2108,9 @@ out: if (is_odp_dma_buf) { if (!ret) to_ib_umem_dmabuf(mr->umem)->private = NULL; - dma_resv_unlock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); + dma_resv_unlock( + to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); } - return ret; } @@ -2126,7 +2159,7 @@ static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr) } /* Stop DMA */ - rc = mlx5_revoke_mr(mr); + rc = mlx5r_handle_mkey_cleanup(mr); if (rc) return rc; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index eaa2f9f5f3a9..f6abd64f07f7 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -259,8 +259,8 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) } if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault)) - __xa_erase(&mr_to_mdev(mr)->odp_mkeys, - mlx5_base_mkey(mr->mmkey.key)); + xa_erase(&mr_to_mdev(mr)->odp_mkeys, + mlx5_base_mkey(mr->mmkey.key)); xa_unlock(&imr->implicit_children); /* Freeing a MR is a sleeping operation, so bounce to a work queue */ @@ -532,8 +532,8 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, } if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) { - ret = __xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_KERNEL); + ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), + &mr->mmkey, GFP_KERNEL); if (xa_is_err(ret)) { ret = ERR_PTR(xa_err(ret)); __xa_erase(&imr->implicit_children, idx); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 1378651735f6..23ed2fc688f0 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3705,9 +3705,10 @@ static ssize_t add_target_store(struct device *dev, target_host->max_id = 1; target_host->max_lun = -1LL; target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; - target_host->max_segment_size = ib_dma_max_seg_size(ibdev); - if (!(ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)) + if (ibdev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) + target_host->max_segment_size = ib_dma_max_seg_size(ibdev); + else target_host->virt_boundary_mask = ~srp_dev->mr_page_mask; target = host_to_target(target_host); diff --git a/drivers/input/joystick/fsia6b.c b/drivers/input/joystick/fsia6b.c index 76ffdec5c183..7e3bc99d766f 100644 --- a/drivers/input/joystick/fsia6b.c +++ b/drivers/input/joystick/fsia6b.c @@ -149,7 +149,7 @@ static int fsia6b_serio_connect(struct serio *serio, struct serio_driver *drv) } fsia6b->dev = input_dev; - snprintf(fsia6b->phys, sizeof(fsia6b->phys), "%s/input0", serio->phys); + scnprintf(fsia6b->phys, sizeof(fsia6b->phys), "%s/input0", serio->phys); input_dev->name = DRIVER_DESC; input_dev->phys = fsia6b->phys; diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index c066a4da7c14..5d9b7007a730 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -177,6 +177,7 @@ static const struct xpad_device { { 0x05fd, 0x107a, "InterAct 'PowerPad Pro' X-Box pad (Germany)", 0, XTYPE_XBOX }, { 0x05fe, 0x3030, "Chic Controller", 0, XTYPE_XBOX }, { 0x05fe, 0x3031, "Chic Controller", 0, XTYPE_XBOX }, + { 0x0502, 0x1305, "Acer NGR200", 0, XTYPE_XBOX }, { 0x062a, 0x0020, "Logic3 Xbox GamePad", 0, XTYPE_XBOX }, { 0x062a, 0x0033, "Competition Pro Steering Wheel", 0, XTYPE_XBOX }, { 0x06a3, 0x0200, "Saitek Racing Wheel", 0, XTYPE_XBOX }, @@ -524,6 +525,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x045e), /* Microsoft Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x045e), /* Microsoft Xbox One controllers */ XPAD_XBOX360_VENDOR(0x046d), /* Logitech Xbox 360-style controllers */ + XPAD_XBOX360_VENDOR(0x0502), /* Acer Inc. Xbox 360 style controllers */ XPAD_XBOX360_VENDOR(0x056e), /* Elecom JC-U3613M */ XPAD_XBOX360_VENDOR(0x06a3), /* Saitek P3600 */ XPAD_XBOX360_VENDOR(0x0738), /* Mad Catz Xbox 360 controllers */ @@ -1344,11 +1346,12 @@ static int xpad_try_sending_next_out_packet(struct usb_xpad *xpad) usb_anchor_urb(xpad->irq_out, &xpad->irq_out_anchor); error = usb_submit_urb(xpad->irq_out, GFP_ATOMIC); if (error) { - dev_err(&xpad->intf->dev, - "%s - usb_submit_urb failed with result %d\n", - __func__, error); + if (error != -ENODEV) + dev_err(&xpad->intf->dev, + "%s - usb_submit_urb failed with result %d\n", + __func__, error); usb_unanchor_urb(xpad->irq_out); - return -EIO; + return error; } xpad->irq_out_active = true; diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index 3ff2fcf05ad5..c9e1127578b9 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -1191,8 +1191,8 @@ static void atkbd_set_device_attrs(struct atkbd *atkbd) "AT %s Set %d keyboard", atkbd->translated ? "Translated" : "Raw", atkbd->set); - snprintf(atkbd->phys, sizeof(atkbd->phys), - "%s/input0", atkbd->ps2dev.serio->phys); + scnprintf(atkbd->phys, sizeof(atkbd->phys), + "%s/input0", atkbd->ps2dev.serio->phys); input_dev->name = atkbd->name; input_dev->phys = atkbd->phys; diff --git a/drivers/input/misc/cs40l50-vibra.c b/drivers/input/misc/cs40l50-vibra.c index dce3b0ec8cf3..330f09123631 100644 --- a/drivers/input/misc/cs40l50-vibra.c +++ b/drivers/input/misc/cs40l50-vibra.c @@ -238,6 +238,8 @@ static int cs40l50_upload_owt(struct cs40l50_work *work_data) header.data_words = len / sizeof(u32); new_owt_effect_data = kmalloc(sizeof(header) + len, GFP_KERNEL); + if (!new_owt_effect_data) + return -ENOMEM; memcpy(new_owt_effect_data, &header, sizeof(header)); memcpy(new_owt_effect_data + sizeof(header), work_data->custom_data, len); diff --git a/drivers/input/misc/gpio-beeper.c b/drivers/input/misc/gpio-beeper.c index d2d2954e2f79..3d65cb4f4ef3 100644 --- a/drivers/input/misc/gpio-beeper.c +++ b/drivers/input/misc/gpio-beeper.c @@ -94,7 +94,7 @@ static int gpio_beeper_probe(struct platform_device *pdev) #ifdef CONFIG_OF static const struct of_device_id gpio_beeper_of_match[] = { - { .compatible = BEEPER_MODNAME, }, + { .compatible = "gpio-beeper", }, { } }; MODULE_DEVICE_TABLE(of, gpio_beeper_of_match); diff --git a/drivers/input/misc/iqs626a.c b/drivers/input/misc/iqs626a.c index 7a6e6927f331..7fba4a8edceb 100644 --- a/drivers/input/misc/iqs626a.c +++ b/drivers/input/misc/iqs626a.c @@ -771,7 +771,7 @@ static int iqs626_parse_trackpad(struct iqs626_private *iqs626, u8 *thresh = &sys_reg->tp_grp_reg.ch_reg_tp[i].thresh; char tc_name[10]; - snprintf(tc_name, sizeof(tc_name), "channel-%d", i); + scnprintf(tc_name, sizeof(tc_name), "channel-%d", i); struct fwnode_handle *tc_node __free(fwnode_handle) = fwnode_get_named_child_node(ch_node, tc_name); diff --git a/drivers/input/misc/iqs7222.c b/drivers/input/misc/iqs7222.c index 80b917944b51..6fac31c0d99f 100644 --- a/drivers/input/misc/iqs7222.c +++ b/drivers/input/misc/iqs7222.c @@ -301,6 +301,7 @@ struct iqs7222_dev_desc { int allow_offset; int event_offset; int comms_offset; + int ext_chan; bool legacy_gesture; struct iqs7222_reg_grp_desc reg_grps[IQS7222_NUM_REG_GRPS]; }; @@ -315,6 +316,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { .allow_offset = 9, .event_offset = 10, .comms_offset = 12, + .ext_chan = 10, .reg_grps = { [IQS7222_REG_GRP_STAT] = { .base = IQS7222_SYS_STATUS, @@ -373,6 +375,7 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = { .allow_offset = 9, .event_offset = 10, .comms_offset = 12, + .ext_chan = 10, .legacy_gesture = true, .reg_grps = { [IQS7222_REG_GRP_STAT] = { @@ -2244,7 +2247,7 @@ static int iqs7222_parse_chan(struct iqs7222_private *iqs7222, const struct iqs7222_dev_desc *dev_desc = iqs7222->dev_desc; struct i2c_client *client = iqs7222->client; int num_chan = dev_desc->reg_grps[IQS7222_REG_GRP_CHAN].num_row; - int ext_chan = rounddown(num_chan, 10); + int ext_chan = dev_desc->ext_chan ? : num_chan; int error, i; u16 *chan_setup = iqs7222->chan_setup[chan_index]; u16 *sys_setup = iqs7222->sys_setup; @@ -2445,7 +2448,7 @@ static int iqs7222_parse_sldr(struct iqs7222_private *iqs7222, const struct iqs7222_dev_desc *dev_desc = iqs7222->dev_desc; struct i2c_client *client = iqs7222->client; int num_chan = dev_desc->reg_grps[IQS7222_REG_GRP_CHAN].num_row; - int ext_chan = rounddown(num_chan, 10); + int ext_chan = dev_desc->ext_chan ? : num_chan; int count, error, reg_offset, i; u16 *event_mask = &iqs7222->sys_setup[dev_desc->event_offset]; u16 *sldr_setup = iqs7222->sldr_setup[sldr_index]; diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index be734d65ea72..d0cb9fb94821 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1408,9 +1408,9 @@ static int alps_do_register_bare_ps2_mouse(struct alps_data *priv) return -ENOMEM; } - snprintf(priv->phys3, sizeof(priv->phys3), "%s/%s", - psmouse->ps2dev.serio->phys, - (priv->dev2 ? "input2" : "input1")); + scnprintf(priv->phys3, sizeof(priv->phys3), "%s/%s", + psmouse->ps2dev.serio->phys, + (priv->dev2 ? "input2" : "input1")); dev3->phys = priv->phys3; /* @@ -3103,8 +3103,8 @@ int alps_init(struct psmouse *psmouse) goto init_fail; } - snprintf(priv->phys2, sizeof(priv->phys2), "%s/input1", - psmouse->ps2dev.serio->phys); + scnprintf(priv->phys2, sizeof(priv->phys2), "%s/input1", + psmouse->ps2dev.serio->phys); dev2->phys = priv->phys2; /* diff --git a/drivers/input/mouse/lifebook.c b/drivers/input/mouse/lifebook.c index 7147dacc404f..283ef46f039f 100644 --- a/drivers/input/mouse/lifebook.c +++ b/drivers/input/mouse/lifebook.c @@ -279,8 +279,8 @@ static int lifebook_create_relative_device(struct psmouse *psmouse) goto err_out; priv->dev2 = dev2; - snprintf(priv->phys, sizeof(priv->phys), - "%s/input1", psmouse->ps2dev.serio->phys); + scnprintf(priv->phys, sizeof(priv->phys), + "%s/input1", psmouse->ps2dev.serio->phys); dev2->phys = priv->phys; dev2->name = "LBPS/2 Fujitsu Lifebook Touchpad"; diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c index a2c9f7144864..77ea7da3b1c5 100644 --- a/drivers/input/mouse/psmouse-base.c +++ b/drivers/input/mouse/psmouse-base.c @@ -1600,7 +1600,7 @@ static int psmouse_connect(struct serio *serio, struct serio_driver *drv) psmouse_pre_receive_byte, psmouse_receive_byte); INIT_DELAYED_WORK(&psmouse->resync_work, psmouse_resync); psmouse->dev = input_dev; - snprintf(psmouse->phys, sizeof(psmouse->phys), "%s/input0", serio->phys); + scnprintf(psmouse->phys, sizeof(psmouse->phys), "%s/input0", serio->phys); psmouse_set_state(psmouse, PSMOUSE_INITIALIZING); diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index 91a2b584dab1..196905162945 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -105,7 +105,6 @@ config TOUCHSCREEN_ADC config TOUCHSCREEN_APPLE_Z2 tristate "Apple Z2 touchscreens" - default ARCH_APPLE depends on SPI && (ARCH_APPLE || COMPILE_TEST) help Say Y here if you have an ARM Apple device with diff --git a/drivers/input/touchscreen/melfas_mip4.c b/drivers/input/touchscreen/melfas_mip4.c index a6946e3d8376..869884219908 100644 --- a/drivers/input/touchscreen/melfas_mip4.c +++ b/drivers/input/touchscreen/melfas_mip4.c @@ -1554,7 +1554,7 @@ static DEFINE_SIMPLE_DEV_PM_OPS(mip4_pm_ops, mip4_suspend, mip4_resume); #ifdef CONFIG_OF static const struct of_device_id mip4_of_match[] = { - { .compatible = "melfas,"MIP4_DEVICE_NAME, }, + { .compatible = "melfas,mip4_ts", }, { }, }; MODULE_DEVICE_TABLE(of, mip4_of_match); diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index fc35cba59145..47692cbfaabd 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -40,9 +40,8 @@ static bool cache_tage_match(struct cache_tag *tag, u16 domain_id, } /* Assign a cache tag with specified type to domain. */ -static int cache_tag_assign(struct dmar_domain *domain, u16 did, - struct device *dev, ioasid_t pasid, - enum cache_tag_type type) +int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev, + ioasid_t pasid, enum cache_tag_type type) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7aa3932251b2..148b944143b8 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3780,8 +3780,17 @@ static void intel_iommu_probe_finalize(struct device *dev) !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1)) info->pasid_enabled = 1; - if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { iommu_enable_pci_ats(info); + /* Assign a DEVTLB cache tag to the default domain. */ + if (info->ats_enabled && info->domain) { + u16 did = domain_id_iommu(info->domain, iommu); + + if (cache_tag_assign(info->domain, did, dev, + IOMMU_NO_PASID, CACHE_TAG_DEVTLB)) + iommu_disable_pci_ats(info); + } + } iommu_enable_pci_pri(info); } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 3ddbcc603de2..2d1afab5eedc 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1289,6 +1289,8 @@ struct cache_tag { unsigned int users; }; +int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev, + ioasid_t pasid, enum cache_tag_type type); int cache_tag_assign_domain(struct dmar_domain *domain, struct device *dev, ioasid_t pasid); void cache_tag_unassign_domain(struct dmar_domain *domain, diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 22f74ba33a0e..e6bb3c784017 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1157,7 +1157,6 @@ static int rk_iommu_of_xlate(struct device *dev, return -ENOMEM; data->iommu = platform_get_drvdata(iommu_dev); - data->iommu->domain = &rk_identity_domain; dev_iommu_priv_set(dev, data); platform_device_put(iommu_dev); @@ -1195,6 +1194,8 @@ static int rk_iommu_probe(struct platform_device *pdev) if (!iommu) return -ENOMEM; + iommu->domain = &rk_identity_domain; + platform_set_drvdata(pdev, iommu); iommu->dev = dev; iommu->num_mmu = 0; diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 0d196e447142..c3928ef79344 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -74,6 +74,7 @@ config ARM_VIC_NR config IRQ_MSI_LIB bool + select GENERIC_MSI_IRQ config ARMADA_370_XP_IRQ bool diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c index 268cc18b781f..258b8e9a2d57 100644 --- a/drivers/irqchip/irq-ath79-misc.c +++ b/drivers/irqchip/irq-ath79-misc.c @@ -15,6 +15,8 @@ #include <linux/of_address.h> #include <linux/of_irq.h> +#include <asm/time.h> + #define AR71XX_RESET_REG_MISC_INT_STATUS 0 #define AR71XX_RESET_REG_MISC_INT_ENABLE 4 @@ -177,21 +179,3 @@ static int __init ar7240_misc_intc_of_init( IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc", ar7240_misc_intc_of_init); - -void __init ath79_misc_irq_init(void __iomem *regs, int irq, - int irq_base, bool is_ar71xx) -{ - struct irq_domain *domain; - - if (is_ar71xx) - ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask; - else - ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack; - - domain = irq_domain_create_legacy(NULL, ATH79_MISC_IRQ_COUNT, - irq_base, 0, &misc_irq_domain_ops, regs); - if (!domain) - panic("Failed to create MISC irqdomain"); - - ath79_misc_intc_domain_init(domain, irq); -} diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig index d4697e79d5a3..b2d10063d35f 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -5,7 +5,6 @@ config BCACHE select BLOCK_HOLDER_DEPRECATED if SYSFS select CRC64 select CLOSURES - select MIN_HEAP help Allows a block device to be used as cache for other devices; uses a btree for indexing and the layout is optimized for SSDs. diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 8998e61efa40..48ce750bf70a 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -164,61 +164,40 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b) * prio is worth 1/8th of what INITIAL_PRIO is worth. */ -static inline unsigned int new_bucket_prio(struct cache *ca, struct bucket *b) -{ - unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; - - return (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); -} - -static inline bool new_bucket_max_cmp(const void *l, const void *r, void *args) -{ - struct bucket **lhs = (struct bucket **)l; - struct bucket **rhs = (struct bucket **)r; - struct cache *ca = args; - - return new_bucket_prio(ca, *lhs) > new_bucket_prio(ca, *rhs); -} - -static inline bool new_bucket_min_cmp(const void *l, const void *r, void *args) -{ - struct bucket **lhs = (struct bucket **)l; - struct bucket **rhs = (struct bucket **)r; - struct cache *ca = args; +#define bucket_prio(b) \ +({ \ + unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \ + \ + (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \ +}) - return new_bucket_prio(ca, *lhs) < new_bucket_prio(ca, *rhs); -} +#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r)) +#define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r)) static void invalidate_buckets_lru(struct cache *ca) { struct bucket *b; - const struct min_heap_callbacks bucket_max_cmp_callback = { - .less = new_bucket_max_cmp, - .swp = NULL, - }; - const struct min_heap_callbacks bucket_min_cmp_callback = { - .less = new_bucket_min_cmp, - .swp = NULL, - }; + ssize_t i; - ca->heap.nr = 0; + ca->heap.used = 0; for_each_bucket(b, ca) { if (!bch_can_invalidate_bucket(ca, b)) continue; - if (!min_heap_full(&ca->heap)) - min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca); - else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) { + if (!heap_full(&ca->heap)) + heap_add(&ca->heap, b, bucket_max_cmp); + else if (bucket_max_cmp(b, heap_peek(&ca->heap))) { ca->heap.data[0] = b; - min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca); + heap_sift(&ca->heap, 0, bucket_max_cmp); } } - min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca); + for (i = ca->heap.used / 2 - 1; i >= 0; --i) + heap_sift(&ca->heap, i, bucket_min_cmp); while (!fifo_full(&ca->free_inc)) { - if (!ca->heap.nr) { + if (!heap_pop(&ca->heap, b, bucket_min_cmp)) { /* * We don't want to be calling invalidate_buckets() * multiple times when it can't do anything @@ -227,8 +206,6 @@ static void invalidate_buckets_lru(struct cache *ca) wake_up_gc(ca->set); return; } - b = min_heap_peek(&ca->heap)[0]; - min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca); bch_invalidate_one_bucket(ca, b); } diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 785b0d9008fa..1d33e40d26ea 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -458,7 +458,7 @@ struct cache { /* Allocation stuff: */ struct bucket *buckets; - DEFINE_MIN_HEAP(struct bucket *, cache_heap) heap; + DECLARE_HEAP(struct bucket *, heap); /* * If nonzero, we know we aren't going to find any buckets to invalidate diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 68258a16e125..463eb13bd0b2 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -54,11 +54,9 @@ void bch_dump_bucket(struct btree_keys *b) int __bch_count_data(struct btree_keys *b) { unsigned int ret = 0; - struct btree_iter iter; + struct btree_iter_stack iter; struct bkey *k; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - if (b->ops->is_extents) for_each_key(b, k, &iter) ret += KEY_SIZE(k); @@ -69,11 +67,9 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) { va_list args; struct bkey *k, *p = NULL; - struct btree_iter iter; + struct btree_iter_stack iter; const char *err; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - for_each_key(b, k, &iter) { if (b->ops->is_extents) { err = "Keys out of order"; @@ -114,9 +110,9 @@ bug: static void bch_btree_iter_next_check(struct btree_iter *iter) { - struct bkey *k = iter->heap.data->k, *next = bkey_next(k); + struct bkey *k = iter->data->k, *next = bkey_next(k); - if (next < iter->heap.data->end && + if (next < iter->data->end && bkey_cmp(k, iter->b->ops->is_extents ? &START_KEY(next) : next) > 0) { bch_dump_bucket(iter->b); @@ -883,14 +879,12 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, unsigned int status = BTREE_INSERT_STATUS_NO_INSERT; struct bset *i = bset_tree_last(b)->data; struct bkey *m, *prev = NULL; - struct btree_iter iter; + struct btree_iter_stack iter; struct bkey preceding_key_on_stack = ZERO_KEY; struct bkey *preceding_key_p = &preceding_key_on_stack; BUG_ON(b->ops->is_extents && !KEY_SIZE(k)); - min_heap_init(&iter.heap, NULL, MAX_BSETS); - /* * If k has preceding key, preceding_key_p will be set to address * of k's preceding key; otherwise preceding_key_p will be set @@ -901,9 +895,9 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k, else preceding_key(k, &preceding_key_p); - m = bch_btree_iter_init(b, &iter, preceding_key_p); + m = bch_btree_iter_stack_init(b, &iter, preceding_key_p); - if (b->ops->insert_fixup(b, k, &iter, replace_key)) + if (b->ops->insert_fixup(b, k, &iter.iter, replace_key)) return status; status = BTREE_INSERT_STATUS_INSERT; @@ -1083,94 +1077,79 @@ struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t, /* Btree iterator */ -typedef bool (new_btree_iter_cmp_fn)(const void *, const void *, void *); +typedef bool (btree_iter_cmp_fn)(struct btree_iter_set, + struct btree_iter_set); -static inline bool new_btree_iter_cmp(const void *l, const void *r, void __always_unused *args) +static inline bool btree_iter_cmp(struct btree_iter_set l, + struct btree_iter_set r) { - const struct btree_iter_set *_l = l; - const struct btree_iter_set *_r = r; - - return bkey_cmp(_l->k, _r->k) <= 0; + return bkey_cmp(l.k, r.k) > 0; } static inline bool btree_iter_end(struct btree_iter *iter) { - return !iter->heap.nr; + return !iter->used; } void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, struct bkey *end) { - const struct min_heap_callbacks callbacks = { - .less = new_btree_iter_cmp, - .swp = NULL, - }; - if (k != end) - BUG_ON(!min_heap_push(&iter->heap, - &((struct btree_iter_set) { k, end }), - &callbacks, - NULL)); + BUG_ON(!heap_add(iter, + ((struct btree_iter_set) { k, end }), + btree_iter_cmp)); } -static struct bkey *__bch_btree_iter_init(struct btree_keys *b, - struct btree_iter *iter, - struct bkey *search, - struct bset_tree *start) +static struct bkey *__bch_btree_iter_stack_init(struct btree_keys *b, + struct btree_iter_stack *iter, + struct bkey *search, + struct bset_tree *start) { struct bkey *ret = NULL; - iter->heap.size = ARRAY_SIZE(iter->heap.preallocated); - iter->heap.nr = 0; + iter->iter.size = ARRAY_SIZE(iter->stack_data); + iter->iter.used = 0; #ifdef CONFIG_BCACHE_DEBUG - iter->b = b; + iter->iter.b = b; #endif for (; start <= bset_tree_last(b); start++) { ret = bch_bset_search(b, start, search); - bch_btree_iter_push(iter, ret, bset_bkey_last(start->data)); + bch_btree_iter_push(&iter->iter, ret, bset_bkey_last(start->data)); } return ret; } -struct bkey *bch_btree_iter_init(struct btree_keys *b, - struct btree_iter *iter, +struct bkey *bch_btree_iter_stack_init(struct btree_keys *b, + struct btree_iter_stack *iter, struct bkey *search) { - return __bch_btree_iter_init(b, iter, search, b->set); + return __bch_btree_iter_stack_init(b, iter, search, b->set); } static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, - new_btree_iter_cmp_fn *cmp) + btree_iter_cmp_fn *cmp) { struct btree_iter_set b __maybe_unused; struct bkey *ret = NULL; - const struct min_heap_callbacks callbacks = { - .less = cmp, - .swp = NULL, - }; if (!btree_iter_end(iter)) { bch_btree_iter_next_check(iter); - ret = iter->heap.data->k; - iter->heap.data->k = bkey_next(iter->heap.data->k); + ret = iter->data->k; + iter->data->k = bkey_next(iter->data->k); - if (iter->heap.data->k > iter->heap.data->end) { + if (iter->data->k > iter->data->end) { WARN_ONCE(1, "bset was corrupt!\n"); - iter->heap.data->k = iter->heap.data->end; + iter->data->k = iter->data->end; } - if (iter->heap.data->k == iter->heap.data->end) { - if (iter->heap.nr) { - b = min_heap_peek(&iter->heap)[0]; - min_heap_pop(&iter->heap, &callbacks, NULL); - } - } + if (iter->data->k == iter->data->end) + heap_pop(iter, b, cmp); else - min_heap_sift_down(&iter->heap, 0, &callbacks, NULL); + heap_sift(iter, 0, cmp); } return ret; @@ -1178,7 +1157,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, struct bkey *bch_btree_iter_next(struct btree_iter *iter) { - return __bch_btree_iter_next(iter, new_btree_iter_cmp); + return __bch_btree_iter_next(iter, btree_iter_cmp); } @@ -1216,18 +1195,16 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out, struct btree_iter *iter, bool fixup, bool remove_stale) { + int i; struct bkey *k, *last = NULL; BKEY_PADDED(k) tmp; bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale ? bch_ptr_bad : bch_ptr_invalid; - const struct min_heap_callbacks callbacks = { - .less = b->ops->sort_cmp, - .swp = NULL, - }; /* Heapify the iterator, using our comparison function */ - min_heapify_all(&iter->heap, &callbacks, NULL); + for (i = iter->used / 2 - 1; i >= 0; --i) + heap_sift(iter, i, b->ops->sort_cmp); while (!btree_iter_end(iter)) { if (b->ops->sort_fixup && fixup) @@ -1316,11 +1293,10 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start, struct bset_sort_state *state) { size_t order = b->page_order, keys = 0; - struct btree_iter iter; + struct btree_iter_stack iter; int oldsize = bch_count_data(b); - min_heap_init(&iter.heap, NULL, MAX_BSETS); - __bch_btree_iter_init(b, &iter, NULL, &b->set[start]); + __bch_btree_iter_stack_init(b, &iter, NULL, &b->set[start]); if (start) { unsigned int i; @@ -1331,7 +1307,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start, order = get_order(__set_bytes(b->set->data, keys)); } - __btree_sort(b, &iter, start, order, false, state); + __btree_sort(b, &iter.iter, start, order, false, state); EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize); } @@ -1347,13 +1323,11 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new, struct bset_sort_state *state) { uint64_t start_time = local_clock(); - struct btree_iter iter; - - min_heap_init(&iter.heap, NULL, MAX_BSETS); + struct btree_iter_stack iter; - bch_btree_iter_init(b, &iter, NULL); + bch_btree_iter_stack_init(b, &iter, NULL); - btree_mergesort(b, new->set->data, &iter, false, true); + btree_mergesort(b, new->set->data, &iter.iter, false, true); bch_time_stats_update(&state->time, start_time); diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index f79441acd4c1..011f6062c4c0 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -187,9 +187,8 @@ struct bset_tree { }; struct btree_keys_ops { - bool (*sort_cmp)(const void *l, - const void *r, - void *args); + bool (*sort_cmp)(struct btree_iter_set l, + struct btree_iter_set r); struct bkey *(*sort_fixup)(struct btree_iter *iter, struct bkey *tmp); bool (*insert_fixup)(struct btree_keys *b, @@ -313,17 +312,23 @@ enum { BTREE_INSERT_STATUS_FRONT_MERGE, }; -struct btree_iter_set { - struct bkey *k, *end; -}; - /* Btree key iteration */ struct btree_iter { + size_t size, used; #ifdef CONFIG_BCACHE_DEBUG struct btree_keys *b; #endif - MIN_HEAP_PREALLOCATED(struct btree_iter_set, btree_iter_heap, MAX_BSETS) heap; + struct btree_iter_set { + struct bkey *k, *end; + } data[]; +}; + +/* Fixed-size btree_iter that can be allocated on the stack */ + +struct btree_iter_stack { + struct btree_iter iter; + struct btree_iter_set stack_data[MAX_BSETS]; }; typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k); @@ -335,9 +340,9 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter, void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k, struct bkey *end); -struct bkey *bch_btree_iter_init(struct btree_keys *b, - struct btree_iter *iter, - struct bkey *search); +struct bkey *bch_btree_iter_stack_init(struct btree_keys *b, + struct btree_iter_stack *iter, + struct bkey *search); struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t, const struct bkey *search); @@ -352,13 +357,14 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b, return search ? __bch_bset_search(b, t, search) : t->data->start; } -#define for_each_key_filter(b, k, iter, filter) \ - for (bch_btree_iter_init((b), (iter), NULL); \ - ((k) = bch_btree_iter_next_filter((iter), (b), filter));) +#define for_each_key_filter(b, k, stack_iter, filter) \ + for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \ + ((k) = bch_btree_iter_next_filter(&((stack_iter)->iter), (b), \ + filter));) -#define for_each_key(b, k, iter) \ - for (bch_btree_iter_init((b), (iter), NULL); \ - ((k) = bch_btree_iter_next(iter));) +#define for_each_key(b, k, stack_iter) \ + for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \ + ((k) = bch_btree_iter_next(&((stack_iter)->iter)));) /* Sorting */ diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 1d0100677357..210b59007d98 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -148,19 +148,19 @@ void bch_btree_node_read_done(struct btree *b) { const char *err = "bad btree header"; struct bset *i = btree_bset_first(b); - struct btree_iter iter; + struct btree_iter *iter; /* * c->fill_iter can allocate an iterator with more memory space * than static MAX_BSETS. * See the comment arount cache_set->fill_iter. */ - iter.heap.data = mempool_alloc(&b->c->fill_iter, GFP_NOIO); - iter.heap.size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size; - iter.heap.nr = 0; + iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO); + iter->size = b->c->cache->sb.bucket_size / b->c->cache->sb.block_size; + iter->used = 0; #ifdef CONFIG_BCACHE_DEBUG - iter.b = &b->keys; + iter->b = &b->keys; #endif if (!i->seq) @@ -198,7 +198,7 @@ void bch_btree_node_read_done(struct btree *b) if (i != b->keys.set[0].data && !i->keys) goto err; - bch_btree_iter_push(&iter, i->start, bset_bkey_last(i)); + bch_btree_iter_push(iter, i->start, bset_bkey_last(i)); b->written += set_blocks(i, block_bytes(b->c->cache)); } @@ -210,7 +210,7 @@ void bch_btree_node_read_done(struct btree *b) if (i->seq == b->keys.set[0].data->seq) goto err; - bch_btree_sort_and_fix_extents(&b->keys, &iter, &b->c->sort); + bch_btree_sort_and_fix_extents(&b->keys, iter, &b->c->sort); i = b->keys.set[0].data; err = "short btree key"; @@ -222,7 +222,7 @@ void bch_btree_node_read_done(struct btree *b) bch_bset_init_next(&b->keys, write_block(b), bset_magic(&b->c->cache->sb)); out: - mempool_free(iter.heap.data, &b->c->fill_iter); + mempool_free(iter, &b->c->fill_iter); return; err: set_btree_node_io_error(b); @@ -1306,11 +1306,9 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc) uint8_t stale = 0; unsigned int keys = 0, good_keys = 0; struct bkey *k; - struct btree_iter iter; + struct btree_iter_stack iter; struct bset_tree *t; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - gc->nodes++; for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) { @@ -1569,11 +1567,9 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op, static unsigned int btree_gc_count_keys(struct btree *b) { struct bkey *k; - struct btree_iter iter; + struct btree_iter_stack iter; unsigned int ret = 0; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad) ret += bkey_u64s(k); @@ -1612,18 +1608,18 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, int ret = 0; bool should_rewrite; struct bkey *k; - struct btree_iter iter; + struct btree_iter_stack iter; struct gc_merge_info r[GC_MERGE_NODES]; struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done); + bch_btree_iter_stack_init(&b->keys, &iter, &b->c->gc_done); for (i = r; i < r + ARRAY_SIZE(r); i++) i->b = ERR_PTR(-EINTR); while (1) { - k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); + k = bch_btree_iter_next_filter(&iter.iter, &b->keys, + bch_ptr_bad); if (k) { r->b = bch_btree_node_get(b->c, op, k, b->level - 1, true, b); @@ -1918,9 +1914,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) { int ret = 0; struct bkey *k, *p = NULL; - struct btree_iter iter; - - min_heap_init(&iter.heap, NULL, MAX_BSETS); + struct btree_iter_stack iter; for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) bch_initial_mark_key(b->c, b->level, k); @@ -1928,10 +1922,10 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) bch_initial_mark_key(b->c, b->level + 1, &b->key); if (b->level) { - bch_btree_iter_init(&b->keys, &iter, NULL); + bch_btree_iter_stack_init(&b->keys, &iter, NULL); do { - k = bch_btree_iter_next_filter(&iter, &b->keys, + k = bch_btree_iter_next_filter(&iter.iter, &b->keys, bch_ptr_bad); if (k) { btree_node_prefetch(b, k); @@ -1959,7 +1953,7 @@ static int bch_btree_check_thread(void *arg) struct btree_check_info *info = arg; struct btree_check_state *check_state = info->state; struct cache_set *c = check_state->c; - struct btree_iter iter; + struct btree_iter_stack iter; struct bkey *k, *p; int cur_idx, prev_idx, skip_nr; @@ -1967,11 +1961,9 @@ static int bch_btree_check_thread(void *arg) cur_idx = prev_idx = 0; ret = 0; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - /* root node keys are checked before thread created */ - bch_btree_iter_init(&c->root->keys, &iter, NULL); - k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); + bch_btree_iter_stack_init(&c->root->keys, &iter, NULL); + k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); BUG_ON(!k); p = k; @@ -1989,7 +1981,7 @@ static int bch_btree_check_thread(void *arg) skip_nr = cur_idx - prev_idx; while (skip_nr) { - k = bch_btree_iter_next_filter(&iter, + k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); if (k) @@ -2062,11 +2054,9 @@ int bch_btree_check(struct cache_set *c) int ret = 0; int i; struct bkey *k = NULL; - struct btree_iter iter; + struct btree_iter_stack iter; struct btree_check_state check_state; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - /* check and mark root node keys */ for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid) bch_initial_mark_key(c, c->root->level, k); @@ -2560,12 +2550,11 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op, if (b->level) { struct bkey *k; - struct btree_iter iter; + struct btree_iter_stack iter; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - bch_btree_iter_init(&b->keys, &iter, from); + bch_btree_iter_stack_init(&b->keys, &iter, from); - while ((k = bch_btree_iter_next_filter(&iter, &b->keys, + while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys, bch_ptr_bad))) { ret = bcache_btree(map_nodes_recurse, k, b, op, from, fn, flags); @@ -2594,12 +2583,12 @@ int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op, { int ret = MAP_CONTINUE; struct bkey *k; - struct btree_iter iter; + struct btree_iter_stack iter; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - bch_btree_iter_init(&b->keys, &iter, from); + bch_btree_iter_stack_init(&b->keys, &iter, from); - while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) { + while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys, + bch_ptr_bad))) { ret = !b->level ? fn(op, b, k) : bcache_btree(map_keys_recurse, k, diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 4b84fda1530a..d626ffcbecb9 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -33,16 +33,15 @@ static void sort_key_next(struct btree_iter *iter, i->k = bkey_next(i->k); if (i->k == i->end) - *i = iter->heap.data[--iter->heap.nr]; + *i = iter->data[--iter->used]; } -static bool new_bch_key_sort_cmp(const void *l, const void *r, void *args) +static bool bch_key_sort_cmp(struct btree_iter_set l, + struct btree_iter_set r) { - struct btree_iter_set *_l = (struct btree_iter_set *)l; - struct btree_iter_set *_r = (struct btree_iter_set *)r; - int64_t c = bkey_cmp(_l->k, _r->k); + int64_t c = bkey_cmp(l.k, r.k); - return !(c ? c > 0 : _l->k < _r->k); + return c ? c > 0 : l.k < r.k; } static bool __ptr_invalid(struct cache_set *c, const struct bkey *k) @@ -239,7 +238,7 @@ static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk, } const struct btree_keys_ops bch_btree_keys_ops = { - .sort_cmp = new_bch_key_sort_cmp, + .sort_cmp = bch_key_sort_cmp, .insert_fixup = bch_btree_ptr_insert_fixup, .key_invalid = bch_btree_ptr_invalid, .key_bad = bch_btree_ptr_bad, @@ -256,28 +255,22 @@ const struct btree_keys_ops bch_btree_keys_ops = { * Necessary for btree_sort_fixup() - if there are multiple keys that compare * equal in different sets, we have to process them newest to oldest. */ - -static bool new_bch_extent_sort_cmp(const void *l, const void *r, void __always_unused *args) +static bool bch_extent_sort_cmp(struct btree_iter_set l, + struct btree_iter_set r) { - struct btree_iter_set *_l = (struct btree_iter_set *)l; - struct btree_iter_set *_r = (struct btree_iter_set *)r; - int64_t c = bkey_cmp(&START_KEY(_l->k), &START_KEY(_r->k)); + int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k)); - return !(c ? c > 0 : _l->k < _r->k); + return c ? c > 0 : l.k < r.k; } static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, struct bkey *tmp) { - const struct min_heap_callbacks callbacks = { - .less = new_bch_extent_sort_cmp, - .swp = NULL, - }; - while (iter->heap.nr > 1) { - struct btree_iter_set *top = iter->heap.data, *i = top + 1; - - if (iter->heap.nr > 2 && - !new_bch_extent_sort_cmp(&i[0], &i[1], NULL)) + while (iter->used > 1) { + struct btree_iter_set *top = iter->data, *i = top + 1; + + if (iter->used > 2 && + bch_extent_sort_cmp(i[0], i[1])) i++; if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0) @@ -285,7 +278,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, if (!KEY_SIZE(i->k)) { sort_key_next(iter, i); - min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL); + heap_sift(iter, i - top, bch_extent_sort_cmp); continue; } @@ -295,7 +288,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, else bch_cut_front(top->k, i->k); - min_heap_sift_down(&iter->heap, i - top, &callbacks, NULL); + heap_sift(iter, i - top, bch_extent_sort_cmp); } else { /* can't happen because of comparison func */ BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k))); @@ -305,7 +298,7 @@ static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter, bch_cut_back(&START_KEY(i->k), tmp); bch_cut_front(i->k, top->k); - min_heap_sift_down(&iter->heap, 0, &callbacks, NULL); + heap_sift(iter, 0, bch_extent_sort_cmp); return tmp; } else { @@ -625,7 +618,7 @@ static bool bch_extent_merge(struct btree_keys *bk, } const struct btree_keys_ops bch_extent_keys_ops = { - .sort_cmp = new_bch_extent_sort_cmp, + .sort_cmp = bch_extent_sort_cmp, .sort_fixup = bch_extent_sort_fixup, .insert_fixup = bch_extent_insert_fixup, .key_invalid = bch_extent_invalid, diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 45ca134cbf02..26a6a535ec32 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -182,19 +182,16 @@ err: if (!IS_ERR_OR_NULL(w->private)) closure_sync(&cl); } -static bool new_bucket_cmp(const void *l, const void *r, void __always_unused *args) +static bool bucket_cmp(struct bucket *l, struct bucket *r) { - struct bucket **_l = (struct bucket **)l; - struct bucket **_r = (struct bucket **)r; - - return GC_SECTORS_USED(*_l) >= GC_SECTORS_USED(*_r); + return GC_SECTORS_USED(l) < GC_SECTORS_USED(r); } static unsigned int bucket_heap_top(struct cache *ca) { struct bucket *b; - return (b = min_heap_peek(&ca->heap)[0]) ? GC_SECTORS_USED(b) : 0; + return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0; } void bch_moving_gc(struct cache_set *c) @@ -202,10 +199,6 @@ void bch_moving_gc(struct cache_set *c) struct cache *ca = c->cache; struct bucket *b; unsigned long sectors_to_move, reserve_sectors; - const struct min_heap_callbacks callbacks = { - .less = new_bucket_cmp, - .swp = NULL, - }; if (!c->copy_gc_enabled) return; @@ -216,7 +209,7 @@ void bch_moving_gc(struct cache_set *c) reserve_sectors = ca->sb.bucket_size * fifo_used(&ca->free[RESERVE_MOVINGGC]); - ca->heap.nr = 0; + ca->heap.used = 0; for_each_bucket(b, ca) { if (GC_MARK(b) == GC_MARK_METADATA || @@ -225,31 +218,25 @@ void bch_moving_gc(struct cache_set *c) atomic_read(&b->pin)) continue; - if (!min_heap_full(&ca->heap)) { + if (!heap_full(&ca->heap)) { sectors_to_move += GC_SECTORS_USED(b); - min_heap_push(&ca->heap, &b, &callbacks, NULL); - } else if (!new_bucket_cmp(&b, min_heap_peek(&ca->heap), ca)) { + heap_add(&ca->heap, b, bucket_cmp); + } else if (bucket_cmp(b, heap_peek(&ca->heap))) { sectors_to_move -= bucket_heap_top(ca); sectors_to_move += GC_SECTORS_USED(b); ca->heap.data[0] = b; - min_heap_sift_down(&ca->heap, 0, &callbacks, NULL); + heap_sift(&ca->heap, 0, bucket_cmp); } } while (sectors_to_move > reserve_sectors) { - if (ca->heap.nr) { - b = min_heap_peek(&ca->heap)[0]; - min_heap_pop(&ca->heap, &callbacks, NULL); - } + heap_pop(&ca->heap, b, bucket_cmp); sectors_to_move -= GC_SECTORS_USED(b); } - while (ca->heap.nr) { - b = min_heap_peek(&ca->heap)[0]; - min_heap_pop(&ca->heap, &callbacks, NULL); + while (heap_pop(&ca->heap, b, bucket_cmp)) SET_GC_MOVE(b, 1); - } mutex_unlock(&c->bucket_lock); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 1efb768b2890..2ea490b9d370 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1912,7 +1912,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) INIT_LIST_HEAD(&c->btree_cache_freed); INIT_LIST_HEAD(&c->data_buckets); - iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) * + iter_size = sizeof(struct btree_iter) + + ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) * sizeof(struct btree_iter_set); c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL); diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index e8f696cb58c0..826b14cae4e5 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -660,9 +660,7 @@ static unsigned int bch_root_usage(struct cache_set *c) unsigned int bytes = 0; struct bkey *k; struct btree *b; - struct btree_iter iter; - - min_heap_init(&iter.heap, NULL, MAX_BSETS); + struct btree_iter_stack iter; goto lock_root; diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index 539454d8e2d0..f61ab1bada6c 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -9,7 +9,6 @@ #include <linux/kernel.h> #include <linux/sched/clock.h> #include <linux/llist.h> -#include <linux/min_heap.h> #include <linux/ratelimit.h> #include <linux/vmalloc.h> #include <linux/workqueue.h> @@ -31,10 +30,16 @@ struct closure; #endif +#define DECLARE_HEAP(type, name) \ + struct { \ + size_t size, used; \ + type *data; \ + } name + #define init_heap(heap, _size, gfp) \ ({ \ size_t _bytes; \ - (heap)->nr = 0; \ + (heap)->used = 0; \ (heap)->size = (_size); \ _bytes = (heap)->size * sizeof(*(heap)->data); \ (heap)->data = kvmalloc(_bytes, (gfp) & GFP_KERNEL); \ @@ -47,6 +52,64 @@ do { \ (heap)->data = NULL; \ } while (0) +#define heap_swap(h, i, j) swap((h)->data[i], (h)->data[j]) + +#define heap_sift(h, i, cmp) \ +do { \ + size_t _r, _j = i; \ + \ + for (; _j * 2 + 1 < (h)->used; _j = _r) { \ + _r = _j * 2 + 1; \ + if (_r + 1 < (h)->used && \ + cmp((h)->data[_r], (h)->data[_r + 1])) \ + _r++; \ + \ + if (cmp((h)->data[_r], (h)->data[_j])) \ + break; \ + heap_swap(h, _r, _j); \ + } \ +} while (0) + +#define heap_sift_down(h, i, cmp) \ +do { \ + while (i) { \ + size_t p = (i - 1) / 2; \ + if (cmp((h)->data[i], (h)->data[p])) \ + break; \ + heap_swap(h, i, p); \ + i = p; \ + } \ +} while (0) + +#define heap_add(h, d, cmp) \ +({ \ + bool _r = !heap_full(h); \ + if (_r) { \ + size_t _i = (h)->used++; \ + (h)->data[_i] = d; \ + \ + heap_sift_down(h, _i, cmp); \ + heap_sift(h, _i, cmp); \ + } \ + _r; \ +}) + +#define heap_pop(h, d, cmp) \ +({ \ + bool _r = (h)->used; \ + if (_r) { \ + (d) = (h)->data[0]; \ + (h)->used--; \ + heap_swap(h, 0, (h)->used); \ + heap_sift(h, 0, cmp); \ + } \ + _r; \ +}) + +#define heap_peek(h) ((h)->used ? (h)->data[0] : NULL) + +#define heap_full(h) ((h)->used == (h)->size) + #define DECLARE_FIFO(type, name) \ struct { \ size_t front, back, size, mask; \ diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 453efbbdc8ee..302e75f1fc4b 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -908,16 +908,15 @@ static int bch_dirty_init_thread(void *arg) struct dirty_init_thrd_info *info = arg; struct bch_dirty_init_state *state = info->state; struct cache_set *c = state->c; - struct btree_iter iter; + struct btree_iter_stack iter; struct bkey *k, *p; int cur_idx, prev_idx, skip_nr; k = p = NULL; prev_idx = 0; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - bch_btree_iter_init(&c->root->keys, &iter, NULL); - k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad); + bch_btree_iter_stack_init(&c->root->keys, &iter, NULL); + k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); BUG_ON(!k); p = k; @@ -931,7 +930,7 @@ static int bch_dirty_init_thread(void *arg) skip_nr = cur_idx - prev_idx; while (skip_nr) { - k = bch_btree_iter_next_filter(&iter, + k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad); if (k) @@ -980,13 +979,11 @@ void bch_sectors_dirty_init(struct bcache_device *d) int i; struct btree *b = NULL; struct bkey *k = NULL; - struct btree_iter iter; + struct btree_iter_stack iter; struct sectors_dirty_init op; struct cache_set *c = d->c; struct bch_dirty_init_state state; - min_heap_init(&iter.heap, NULL, MAX_BSETS); - retry_lock: b = c->root; rw_lock(0, b, b->level); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 9dfdb63220d7..17157c4216a5 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -517,7 +517,10 @@ static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv, { struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk; SHASH_DESC_ON_STACK(desc, lmk->hash_tfm); - struct md5_state md5state; + union { + struct md5_state md5state; + u8 state[CRYPTO_MD5_STATESIZE]; + } u; __le32 buf[4]; int i, r; @@ -548,13 +551,13 @@ static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv, return r; /* No MD5 padding here */ - r = crypto_shash_export(desc, &md5state); + r = crypto_shash_export(desc, &u.md5state); if (r) return r; for (i = 0; i < MD5_HASH_WORDS; i++) - __cpu_to_le32s(&md5state.hash[i]); - memcpy(iv, &md5state.hash, cc->iv_size); + __cpu_to_le32s(&u.md5state.hash[i]); + memcpy(iv, &u.md5state.hash, cc->iv_size); return 0; } diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index d296770478b2..e8c0a8c6fb51 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2407,7 +2407,7 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) */ sb_retrieve_failed_devices(sb, failed_devices); rdev_for_each(r, mddev) { - if (test_bit(Journal, &rdev->flags) || + if (test_bit(Journal, &r->flags) || !r->sb_page) continue; sb2 = page_address(r->sb_page); diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index bd694910b01b..7f524a26cebc 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -2366,8 +2366,7 @@ static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats) if (!bitmap) return -ENOENT; - if (!bitmap->mddev->bitmap_info.external && - !bitmap->storage.sb_page) + if (!bitmap->storage.sb_page) return -EINVAL; sb = kmap_local_page(bitmap->storage.sb_page); stats->sync_size = le64_to_cpu(sb->sync_size); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 19c5a0ce5a40..64b8176907a9 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1399,7 +1399,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, } read_bio = bio_alloc_clone(mirror->rdev->bdev, bio, gfp, &mddev->bio_set); - + read_bio->bi_opf &= ~REQ_NOWAIT; r1_bio->bios[rdisk] = read_bio; read_bio->bi_iter.bi_sector = r1_bio->sector + @@ -1649,6 +1649,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, wait_for_serialization(rdev, r1_bio); } + mbio->bi_opf &= ~REQ_NOWAIT; r1_bio->bios[i] = mbio; mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset); @@ -3428,6 +3429,7 @@ static int raid1_reshape(struct mddev *mddev) /* ok, everything is stopped */ oldpool = conf->r1bio_pool; conf->r1bio_pool = newpool; + init_waitqueue_head(&conf->r1bio_pool.wait); for (d = d2 = 0; d < conf->raid_disks; d++) { struct md_rdev *rdev = conf->mirrors[d].rdev; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index b74780af4c22..c9bd2005bfd0 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1182,8 +1182,11 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, } } - if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) + if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) { + raid_end_bio_io(r10_bio); return; + } + rdev = read_balance(conf, r10_bio, &max_sectors); if (!rdev) { if (err_rdev) { @@ -1221,6 +1224,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, r10_bio->master_bio = bio; } read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set); + read_bio->bi_opf &= ~REQ_NOWAIT; r10_bio->devs[slot].bio = read_bio; r10_bio->devs[slot].rdev = rdev; @@ -1256,6 +1260,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, conf->mirrors[devnum].rdev; mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, &mddev->bio_set); + mbio->bi_opf &= ~REQ_NOWAIT; if (replacement) r10_bio->devs[n_copy].repl_bio = mbio; else @@ -1370,8 +1375,11 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, } sectors = r10_bio->sectors; - if (!regular_request_wait(mddev, conf, bio, sectors)) + if (!regular_request_wait(mddev, conf, bio, sectors)) { + raid_end_bio_io(r10_bio); return; + } + if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && (mddev->reshape_backwards ? (bio->bi_iter.bi_sector < conf->reshape_safe && diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c index 488e346047c1..77230fbe07be 100644 --- a/drivers/mfd/88pm860x-core.c +++ b/drivers/mfd/88pm860x-core.c @@ -573,7 +573,6 @@ static int device_irq_init(struct pm860x_chip *chip, unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT; int data, mask, ret = -EINVAL; int nr_irqs, irq_base = -1; - struct device_node *node = i2c->dev.of_node; mask = PM8607_B0_MISC1_INV_INT | PM8607_B0_MISC1_INT_CLEAR | PM8607_B0_MISC1_INT_MASK; @@ -624,7 +623,7 @@ static int device_irq_init(struct pm860x_chip *chip, ret = -EBUSY; goto out; } - irq_domain_create_legacy(of_fwnode_handle(node), nr_irqs, chip->irq_base, 0, + irq_domain_create_legacy(dev_fwnode(&i2c->dev), nr_irqs, chip->irq_base, 0, &pm860x_irq_domain_ops, chip); chip->core_irq = i2c->irq; if (!chip->core_irq) diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c index 78b16c67a5fc..25377dcce60e 100644 --- a/drivers/mfd/max8925-core.c +++ b/drivers/mfd/max8925-core.c @@ -656,7 +656,6 @@ static int max8925_irq_init(struct max8925_chip *chip, int irq, { unsigned long flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT; int ret; - struct device_node *node = chip->dev->of_node; /* clear all interrupts */ max8925_reg_read(chip->i2c, MAX8925_CHG_IRQ1); @@ -682,8 +681,9 @@ static int max8925_irq_init(struct max8925_chip *chip, int irq, return -EBUSY; } - irq_domain_create_legacy(of_fwnode_handle(node), MAX8925_NR_IRQS, chip->irq_base, 0, - &max8925_irq_domain_ops, chip); + irq_domain_create_legacy(dev_fwnode(chip->dev), MAX8925_NR_IRQS, + chip->irq_base, 0, &max8925_irq_domain_ops, + chip); /* request irq handler for pmic main irq*/ chip->core_irq = irq; diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c index 232c2bfe8c18..d3ab40651307 100644 --- a/drivers/mfd/twl4030-irq.c +++ b/drivers/mfd/twl4030-irq.c @@ -676,7 +676,6 @@ int twl4030_init_irq(struct device *dev, int irq_num) static struct irq_chip twl4030_irq_chip; int status, i; int irq_base, irq_end, nr_irqs; - struct device_node *node = dev->of_node; /* * TWL core and pwr interrupts must be contiguous because @@ -691,7 +690,7 @@ int twl4030_init_irq(struct device *dev, int irq_num) return irq_base; } - irq_domain_create_legacy(of_fwnode_handle(node), nr_irqs, irq_base, 0, + irq_domain_create_legacy(dev_fwnode(dev), nr_irqs, irq_base, 0, &irq_domain_simple_ops, NULL); irq_end = irq_base + TWL4030_CORE_NR_IRQS; diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index 7f893bafaa60..c417ed34c057 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -44,6 +44,12 @@ static const struct mmc_fixup __maybe_unused mmc_sd_fixups[] = { 0, -1ull, SDIO_ANY_ID, SDIO_ANY_ID, add_quirk_sd, MMC_QUIRK_NO_UHS_DDR50_TUNING, EXT_CSD_REV_ANY), + /* + * Some SD cards reports discard support while they don't + */ + MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd, + MMC_QUIRK_BROKEN_SD_DISCARD), + END_FIXUP }; @@ -147,12 +153,6 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = { MMC_FIXUP("M62704", CID_MANFID_KINGSTON, 0x0100, add_quirk_mmc, MMC_QUIRK_TRIM_BROKEN), - /* - * Some SD cards reports discard support while they don't - */ - MMC_FIXUP(CID_NAME_ANY, CID_MANFID_SANDISK_SD, 0x5344, add_quirk_sd, - MMC_QUIRK_BROKEN_SD_DISCARD), - END_FIXUP }; diff --git a/drivers/mmc/core/sd_uhs2.c b/drivers/mmc/core/sd_uhs2.c index 1c31d0dfa961..de17d1611290 100644 --- a/drivers/mmc/core/sd_uhs2.c +++ b/drivers/mmc/core/sd_uhs2.c @@ -91,8 +91,8 @@ static int sd_uhs2_phy_init(struct mmc_host *host) err = host->ops->uhs2_control(host, UHS2_PHY_INIT); if (err) { - pr_err("%s: failed to initial phy for UHS-II!\n", - mmc_hostname(host)); + pr_debug("%s: failed to initial phy for UHS-II!\n", + mmc_hostname(host)); } return err; diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 31eb90536bce..d7020e06dd55 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -846,12 +846,18 @@ static inline void msdc_dma_setup(struct msdc_host *host, struct msdc_dma *dma, static void msdc_prepare_data(struct msdc_host *host, struct mmc_data *data) { if (!(data->host_cookie & MSDC_PREPARE_FLAG)) { - data->host_cookie |= MSDC_PREPARE_FLAG; data->sg_count = dma_map_sg(host->dev, data->sg, data->sg_len, mmc_get_dma_dir(data)); + if (data->sg_count) + data->host_cookie |= MSDC_PREPARE_FLAG; } } +static bool msdc_data_prepared(struct mmc_data *data) +{ + return data->host_cookie & MSDC_PREPARE_FLAG; +} + static void msdc_unprepare_data(struct msdc_host *host, struct mmc_data *data) { if (data->host_cookie & MSDC_ASYNC_FLAG) @@ -1483,8 +1489,19 @@ static void msdc_ops_request(struct mmc_host *mmc, struct mmc_request *mrq) WARN_ON(!host->hsq_en && host->mrq); host->mrq = mrq; - if (mrq->data) + if (mrq->data) { msdc_prepare_data(host, mrq->data); + if (!msdc_data_prepared(mrq->data)) { + host->mrq = NULL; + /* + * Failed to prepare DMA area, fail fast before + * starting any commands. + */ + mrq->cmd->error = -ENOSPC; + mmc_request_done(mmc_from_priv(host), mrq); + return; + } + } /* if SBC is required, we have HW option and SW option. * if HW option is enabled, and SBC does not have "special" flags, diff --git a/drivers/mmc/host/sdhci-of-k1.c b/drivers/mmc/host/sdhci-of-k1.c index 6880d3e9ab62..2e5da7c5834c 100644 --- a/drivers/mmc/host/sdhci-of-k1.c +++ b/drivers/mmc/host/sdhci-of-k1.c @@ -276,7 +276,8 @@ static int spacemit_sdhci_probe(struct platform_device *pdev) host->mmc->caps |= MMC_CAP_NEED_RSP_BUSY; - if (spacemit_sdhci_get_clocks(dev, pltfm_host)) + ret = spacemit_sdhci_get_clocks(dev, pltfm_host); + if (ret) goto err_pltfm; ret = sdhci_add_host(host); diff --git a/drivers/mmc/host/sdhci-uhs2.c b/drivers/mmc/host/sdhci-uhs2.c index c53b64d50c0d..0efeb9d0c376 100644 --- a/drivers/mmc/host/sdhci-uhs2.c +++ b/drivers/mmc/host/sdhci-uhs2.c @@ -99,8 +99,8 @@ void sdhci_uhs2_reset(struct sdhci_host *host, u16 mask) /* hw clears the bit when it's done */ if (read_poll_timeout_atomic(sdhci_readw, val, !(val & mask), 10, UHS2_RESET_TIMEOUT_100MS, true, host, SDHCI_UHS2_SW_RESET)) { - pr_warn("%s: %s: Reset 0x%x never completed. %s: clean reset bit.\n", __func__, - mmc_hostname(host->mmc), (int)mask, mmc_hostname(host->mmc)); + pr_debug("%s: %s: Reset 0x%x never completed. %s: clean reset bit.\n", __func__, + mmc_hostname(host->mmc), (int)mask, mmc_hostname(host->mmc)); sdhci_writeb(host, 0, SDHCI_UHS2_SW_RESET); return; } @@ -335,8 +335,8 @@ static int sdhci_uhs2_interface_detect(struct sdhci_host *host) if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_IF_DETECT), 100, UHS2_INTERFACE_DETECT_TIMEOUT_100MS, true, host, SDHCI_PRESENT_STATE)) { - pr_warn("%s: not detect UHS2 interface in 100ms.\n", mmc_hostname(host->mmc)); - sdhci_dumpregs(host); + pr_debug("%s: not detect UHS2 interface in 100ms.\n", mmc_hostname(host->mmc)); + sdhci_dbg_dumpregs(host, "UHS2 interface detect timeout in 100ms"); return -EIO; } @@ -345,8 +345,8 @@ static int sdhci_uhs2_interface_detect(struct sdhci_host *host) if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_LANE_SYNC), 100, UHS2_LANE_SYNC_TIMEOUT_150MS, true, host, SDHCI_PRESENT_STATE)) { - pr_warn("%s: UHS2 Lane sync fail in 150ms.\n", mmc_hostname(host->mmc)); - sdhci_dumpregs(host); + pr_debug("%s: UHS2 Lane sync fail in 150ms.\n", mmc_hostname(host->mmc)); + sdhci_dbg_dumpregs(host, "UHS2 Lane sync fail in 150ms"); return -EIO; } @@ -417,12 +417,12 @@ static int sdhci_uhs2_do_detect_init(struct mmc_host *mmc) host->ops->uhs2_pre_detect_init(host); if (sdhci_uhs2_interface_detect(host)) { - pr_warn("%s: cannot detect UHS2 interface.\n", mmc_hostname(host->mmc)); + pr_debug("%s: cannot detect UHS2 interface.\n", mmc_hostname(host->mmc)); return -EIO; } if (sdhci_uhs2_init(host)) { - pr_warn("%s: UHS2 init fail.\n", mmc_hostname(host->mmc)); + pr_debug("%s: UHS2 init fail.\n", mmc_hostname(host->mmc)); return -EIO; } @@ -504,8 +504,8 @@ static int sdhci_uhs2_check_dormant(struct sdhci_host *host) if (read_poll_timeout(sdhci_readl, val, (val & SDHCI_UHS2_IN_DORMANT_STATE), 100, UHS2_CHECK_DORMANT_TIMEOUT_100MS, true, host, SDHCI_PRESENT_STATE)) { - pr_warn("%s: UHS2 IN_DORMANT fail in 100ms.\n", mmc_hostname(host->mmc)); - sdhci_dumpregs(host); + pr_debug("%s: UHS2 IN_DORMANT fail in 100ms.\n", mmc_hostname(host->mmc)); + sdhci_dbg_dumpregs(host, "UHS2 IN_DORMANT fail in 100ms"); return -EIO; } return 0; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index f008167d1863..e116f2db34d5 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2065,15 +2065,10 @@ void sdhci_set_clock(struct sdhci_host *host, unsigned int clock) host->mmc->actual_clock = 0; - clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); - if (clk & SDHCI_CLOCK_CARD_EN) - sdhci_writew(host, clk & ~SDHCI_CLOCK_CARD_EN, - SDHCI_CLOCK_CONTROL); + sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL); - if (clock == 0) { - sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL); + if (clock == 0) return; - } clk = sdhci_calc_clk(host, clock, &host->mmc->actual_clock); sdhci_enable_clk(host, clk); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index f9d65dd0f2b2..70ada1857a4c 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -900,4 +900,20 @@ void sdhci_switch_external_dma(struct sdhci_host *host, bool en); void sdhci_set_data_timeout_irq(struct sdhci_host *host, bool enable); void __sdhci_set_timeout(struct sdhci_host *host, struct mmc_command *cmd); +#if defined(CONFIG_DYNAMIC_DEBUG) || \ + (defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE)) +#define SDHCI_DBG_ANYWAY 0 +#elif defined(DEBUG) +#define SDHCI_DBG_ANYWAY 1 +#else +#define SDHCI_DBG_ANYWAY 0 +#endif + +#define sdhci_dbg_dumpregs(host, fmt) \ +do { \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (DYNAMIC_DEBUG_BRANCH(descriptor) || SDHCI_DBG_ANYWAY) \ + sdhci_dumpregs(host); \ +} while (0) + #endif /* __SDHCI_HW_H */ diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 391d81ad960c..8dc4f5c493fc 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -559,7 +559,7 @@ static int mtdchar_blkpg_ioctl(struct mtd_info *mtd, /* Sanitize user input */ p.devname[BLKPG_DEVNAMELTH - 1] = '\0'; - return mtd_add_partition(mtd, p.devname, p.start, p.length, NULL); + return mtd_add_partition(mtd, p.devname, p.start, p.length); case BLKPG_DEL_PARTITION: diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 429d8c16baf0..5ba9a741f5ac 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -68,13 +68,7 @@ static struct class mtd_class = { .pm = MTD_CLS_PM_OPS, }; -static struct class mtd_master_class = { - .name = "mtd_master", - .pm = MTD_CLS_PM_OPS, -}; - static DEFINE_IDR(mtd_idr); -static DEFINE_IDR(mtd_master_idr); /* These are exported solely for the purpose of mtd_blkdevs.c. You should not use them for _anything_ else */ @@ -89,9 +83,8 @@ EXPORT_SYMBOL_GPL(__mtd_next_device); static LIST_HEAD(mtd_notifiers); -#define MTD_MASTER_DEVS 255 + #define MTD_DEVT(index) MKDEV(MTD_CHAR_MAJOR, (index)*2) -static dev_t mtd_master_devt; /* REVISIT once MTD uses the driver model better, whoever allocates * the mtd_info will probably want to use the release() hook... @@ -111,17 +104,6 @@ static void mtd_release(struct device *dev) device_destroy(&mtd_class, index + 1); } -static void mtd_master_release(struct device *dev) -{ - struct mtd_info *mtd = dev_get_drvdata(dev); - - idr_remove(&mtd_master_idr, mtd->index); - of_node_put(mtd_get_of_node(mtd)); - - if (mtd_is_partition(mtd)) - release_mtd_partition(mtd); -} - static void mtd_device_release(struct kref *kref) { struct mtd_info *mtd = container_of(kref, struct mtd_info, refcnt); @@ -385,11 +367,6 @@ static const struct device_type mtd_devtype = { .release = mtd_release, }; -static const struct device_type mtd_master_devtype = { - .name = "mtd_master", - .release = mtd_master_release, -}; - static bool mtd_expert_analysis_mode; #ifdef CONFIG_DEBUG_FS @@ -657,13 +634,13 @@ exit_parent: /** * add_mtd_device - register an MTD device * @mtd: pointer to new MTD device info structure - * @partitioned: create partitioned device * * Add a device to the list of MTD devices present in the system, and * notify each currently active MTD 'user' of its arrival. Returns * zero on success or non-zero on failure. */ -int add_mtd_device(struct mtd_info *mtd, bool partitioned) + +int add_mtd_device(struct mtd_info *mtd) { struct device_node *np = mtd_get_of_node(mtd); struct mtd_info *master = mtd_get_master(mtd); @@ -710,17 +687,10 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned) ofidx = -1; if (np) ofidx = of_alias_get_id(np, "mtd"); - if (partitioned) { - if (ofidx >= 0) - i = idr_alloc(&mtd_idr, mtd, ofidx, ofidx + 1, GFP_KERNEL); - else - i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL); - } else { - if (ofidx >= 0) - i = idr_alloc(&mtd_master_idr, mtd, ofidx, ofidx + 1, GFP_KERNEL); - else - i = idr_alloc(&mtd_master_idr, mtd, 0, 0, GFP_KERNEL); - } + if (ofidx >= 0) + i = idr_alloc(&mtd_idr, mtd, ofidx, ofidx + 1, GFP_KERNEL); + else + i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL); if (i < 0) { error = i; goto fail_locked; @@ -768,18 +738,10 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned) /* Caller should have set dev.parent to match the * physical device, if appropriate. */ - if (partitioned) { - mtd->dev.type = &mtd_devtype; - mtd->dev.class = &mtd_class; - mtd->dev.devt = MTD_DEVT(i); - dev_set_name(&mtd->dev, "mtd%d", i); - error = dev_set_name(&mtd->dev, "mtd%d", i); - } else { - mtd->dev.type = &mtd_master_devtype; - mtd->dev.class = &mtd_master_class; - mtd->dev.devt = MKDEV(MAJOR(mtd_master_devt), i); - error = dev_set_name(&mtd->dev, "mtd_master%d", i); - } + mtd->dev.type = &mtd_devtype; + mtd->dev.class = &mtd_class; + mtd->dev.devt = MTD_DEVT(i); + error = dev_set_name(&mtd->dev, "mtd%d", i); if (error) goto fail_devname; dev_set_drvdata(&mtd->dev, mtd); @@ -787,7 +749,6 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned) of_node_get(mtd_get_of_node(mtd)); error = device_register(&mtd->dev); if (error) { - pr_err("mtd: %s device_register fail %d\n", mtd->name, error); put_device(&mtd->dev); goto fail_added; } @@ -799,13 +760,10 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned) mtd_debugfs_populate(mtd); - if (partitioned) { - device_create(&mtd_class, mtd->dev.parent, MTD_DEVT(i) + 1, NULL, - "mtd%dro", i); - } + device_create(&mtd_class, mtd->dev.parent, MTD_DEVT(i) + 1, NULL, + "mtd%dro", i); - pr_debug("mtd: Giving out %spartitioned device %d to %s\n", - partitioned ? "" : "un-", i, mtd->name); + pr_debug("mtd: Giving out device %d to %s\n", i, mtd->name); /* No need to get a refcount on the module containing the notifier, since we hold the mtd_table_mutex */ list_for_each_entry(not, &mtd_notifiers, list) @@ -813,16 +771,13 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned) mutex_unlock(&mtd_table_mutex); - if (partitioned) { - if (of_property_read_bool(mtd_get_of_node(mtd), "linux,rootfs")) { - if (IS_BUILTIN(CONFIG_MTD)) { - pr_info("mtd: setting mtd%d (%s) as root device\n", - mtd->index, mtd->name); - ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, mtd->index); - } else { - pr_warn("mtd: can't set mtd%d (%s) as root device - mtd must be builtin\n", - mtd->index, mtd->name); - } + if (of_property_read_bool(mtd_get_of_node(mtd), "linux,rootfs")) { + if (IS_BUILTIN(CONFIG_MTD)) { + pr_info("mtd: setting mtd%d (%s) as root device\n", mtd->index, mtd->name); + ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, mtd->index); + } else { + pr_warn("mtd: can't set mtd%d (%s) as root device - mtd must be builtin\n", + mtd->index, mtd->name); } } @@ -838,10 +793,7 @@ fail_nvmem_add: fail_added: of_node_put(mtd_get_of_node(mtd)); fail_devname: - if (partitioned) - idr_remove(&mtd_idr, i); - else - idr_remove(&mtd_master_idr, i); + idr_remove(&mtd_idr, i); fail_locked: mutex_unlock(&mtd_table_mutex); return error; @@ -859,14 +811,12 @@ fail_locked: int del_mtd_device(struct mtd_info *mtd) { - struct mtd_notifier *not; - struct idr *idr; int ret; + struct mtd_notifier *not; mutex_lock(&mtd_table_mutex); - idr = mtd->dev.class == &mtd_class ? &mtd_idr : &mtd_master_idr; - if (idr_find(idr, mtd->index) != mtd) { + if (idr_find(&mtd_idr, mtd->index) != mtd) { ret = -ENODEV; goto out_error; } @@ -1106,7 +1056,6 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types, const struct mtd_partition *parts, int nr_parts) { - struct mtd_info *parent; int ret, err; mtd_set_dev_defaults(mtd); @@ -1115,30 +1064,25 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types, if (ret) goto out; - ret = add_mtd_device(mtd, false); - if (ret) - goto out; - if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) { - ret = mtd_add_partition(mtd, mtd->name, 0, MTDPART_SIZ_FULL, &parent); + ret = add_mtd_device(mtd); if (ret) goto out; - - } else { - parent = mtd; } /* Prefer parsed partitions over driver-provided fallback */ - ret = parse_mtd_partitions(parent, types, parser_data); + ret = parse_mtd_partitions(mtd, types, parser_data); if (ret == -EPROBE_DEFER) goto out; if (ret > 0) ret = 0; else if (nr_parts) - ret = add_mtd_partitions(parent, parts, nr_parts); - else if (!IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) - ret = mtd_add_partition(parent, mtd->name, 0, MTDPART_SIZ_FULL, NULL); + ret = add_mtd_partitions(mtd, parts, nr_parts); + else if (!device_is_registered(&mtd->dev)) + ret = add_mtd_device(mtd); + else + ret = 0; if (ret) goto out; @@ -1158,14 +1102,13 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types, register_reboot_notifier(&mtd->reboot_notifier); } - return 0; out: - nvmem_unregister(mtd->otp_user_nvmem); - nvmem_unregister(mtd->otp_factory_nvmem); - - del_mtd_partitions(mtd); + if (ret) { + nvmem_unregister(mtd->otp_user_nvmem); + nvmem_unregister(mtd->otp_factory_nvmem); + } - if (device_is_registered(&mtd->dev)) { + if (ret && device_is_registered(&mtd->dev)) { err = del_mtd_device(mtd); if (err) pr_err("Error when deleting MTD device (%d)\n", err); @@ -1324,7 +1267,8 @@ int __get_mtd_device(struct mtd_info *mtd) mtd = mtd->parent; } - kref_get(&master->refcnt); + if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) + kref_get(&master->refcnt); return 0; } @@ -1418,7 +1362,8 @@ void __put_mtd_device(struct mtd_info *mtd) mtd = parent; } - kref_put(&master->refcnt, mtd_device_release); + if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) + kref_put(&master->refcnt, mtd_device_release); module_put(master->owner); @@ -2585,16 +2530,6 @@ static int __init init_mtd(void) if (ret) goto err_reg; - ret = class_register(&mtd_master_class); - if (ret) - goto err_reg2; - - ret = alloc_chrdev_region(&mtd_master_devt, 0, MTD_MASTER_DEVS, "mtd_master"); - if (ret < 0) { - pr_err("unable to allocate char dev region\n"); - goto err_chrdev; - } - mtd_bdi = mtd_bdi_init("mtd"); if (IS_ERR(mtd_bdi)) { ret = PTR_ERR(mtd_bdi); @@ -2619,10 +2554,6 @@ out_procfs: bdi_unregister(mtd_bdi); bdi_put(mtd_bdi); err_bdi: - unregister_chrdev_region(mtd_master_devt, MTD_MASTER_DEVS); -err_chrdev: - class_unregister(&mtd_master_class); -err_reg2: class_unregister(&mtd_class); err_reg: pr_err("Error registering mtd class or bdi: %d\n", ret); @@ -2636,12 +2567,9 @@ static void __exit cleanup_mtd(void) if (proc_mtd) remove_proc_entry("mtd", NULL); class_unregister(&mtd_class); - class_unregister(&mtd_master_class); - unregister_chrdev_region(mtd_master_devt, MTD_MASTER_DEVS); bdi_unregister(mtd_bdi); bdi_put(mtd_bdi); idr_destroy(&mtd_idr); - idr_destroy(&mtd_master_idr); } module_init(init_mtd); diff --git a/drivers/mtd/mtdcore.h b/drivers/mtd/mtdcore.h index 2258d31c5aa6..b014861a06a6 100644 --- a/drivers/mtd/mtdcore.h +++ b/drivers/mtd/mtdcore.h @@ -8,7 +8,7 @@ extern struct mutex mtd_table_mutex; extern struct backing_dev_info *mtd_bdi; struct mtd_info *__mtd_next_device(int i); -int __must_check add_mtd_device(struct mtd_info *mtd, bool partitioned); +int __must_check add_mtd_device(struct mtd_info *mtd); int del_mtd_device(struct mtd_info *mtd); int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *, int); int del_mtd_partitions(struct mtd_info *); diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 5a3db36d734e..994e8c51e674 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -86,7 +86,8 @@ static struct mtd_info *allocate_partition(struct mtd_info *parent, * parent conditional on that option. Note, this is a way to * distinguish between the parent and its partitions in sysfs. */ - child->dev.parent = &parent->dev; + child->dev.parent = IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd_is_partition(parent) ? + &parent->dev : parent->dev.parent; child->dev.of_node = part->of_node; child->parent = parent; child->part.offset = part->offset; @@ -242,7 +243,7 @@ static int mtd_add_partition_attrs(struct mtd_info *new) } int mtd_add_partition(struct mtd_info *parent, const char *name, - long long offset, long long length, struct mtd_info **out) + long long offset, long long length) { struct mtd_info *master = mtd_get_master(parent); u64 parent_size = mtd_is_partition(parent) ? @@ -275,15 +276,12 @@ int mtd_add_partition(struct mtd_info *parent, const char *name, list_add_tail(&child->part.node, &parent->partitions); mutex_unlock(&master->master.partitions_lock); - ret = add_mtd_device(child, true); + ret = add_mtd_device(child); if (ret) goto err_remove_part; mtd_add_partition_attrs(child); - if (out) - *out = child; - return 0; err_remove_part: @@ -415,7 +413,7 @@ int add_mtd_partitions(struct mtd_info *parent, list_add_tail(&child->part.node, &parent->partitions); mutex_unlock(&master->master.partitions_lock); - ret = add_mtd_device(child, true); + ret = add_mtd_device(child); if (ret) { mutex_lock(&master->master.partitions_lock); list_del(&child->part.node); @@ -592,6 +590,9 @@ static int mtd_part_of_parse(struct mtd_info *master, int ret, err = 0; dev = &master->dev; + /* Use parent device (controller) if the top level MTD is not registered */ + if (!IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) && !mtd_is_partition(master)) + dev = master->dev.parent; np = mtd_get_of_node(master); if (mtd_is_partition(master)) @@ -710,7 +711,6 @@ int parse_mtd_partitions(struct mtd_info *master, const char *const *types, if (ret < 0 && !err) err = ret; } - return err; } diff --git a/drivers/mtd/nand/qpic_common.c b/drivers/mtd/nand/qpic_common.c index 4dc4d65e7d32..8e604cc22ca3 100644 --- a/drivers/mtd/nand/qpic_common.c +++ b/drivers/mtd/nand/qpic_common.c @@ -57,14 +57,15 @@ qcom_alloc_bam_transaction(struct qcom_nand_controller *nandc) bam_txn_buf += sizeof(*bam_txn); bam_txn->bam_ce = bam_txn_buf; - bam_txn_buf += - sizeof(*bam_txn->bam_ce) * QPIC_PER_CW_CMD_ELEMENTS * num_cw; + bam_txn->bam_ce_nitems = QPIC_PER_CW_CMD_ELEMENTS * num_cw; + bam_txn_buf += sizeof(*bam_txn->bam_ce) * bam_txn->bam_ce_nitems; bam_txn->cmd_sgl = bam_txn_buf; - bam_txn_buf += - sizeof(*bam_txn->cmd_sgl) * QPIC_PER_CW_CMD_SGL * num_cw; + bam_txn->cmd_sgl_nitems = QPIC_PER_CW_CMD_SGL * num_cw; + bam_txn_buf += sizeof(*bam_txn->cmd_sgl) * bam_txn->cmd_sgl_nitems; bam_txn->data_sgl = bam_txn_buf; + bam_txn->data_sgl_nitems = QPIC_PER_CW_DATA_SGL * num_cw; init_completion(&bam_txn->txn_done); @@ -238,6 +239,11 @@ int qcom_prep_bam_dma_desc_cmd(struct qcom_nand_controller *nandc, bool read, struct bam_transaction *bam_txn = nandc->bam_txn; u32 offset; + if (bam_txn->bam_ce_pos + size > bam_txn->bam_ce_nitems) { + dev_err(nandc->dev, "BAM %s array is full\n", "CE"); + return -EINVAL; + } + bam_ce_buffer = &bam_txn->bam_ce[bam_txn->bam_ce_pos]; /* fill the command desc */ @@ -258,6 +264,12 @@ int qcom_prep_bam_dma_desc_cmd(struct qcom_nand_controller *nandc, bool read, /* use the separate sgl after this command */ if (flags & NAND_BAM_NEXT_SGL) { + if (bam_txn->cmd_sgl_pos >= bam_txn->cmd_sgl_nitems) { + dev_err(nandc->dev, "BAM %s array is full\n", + "CMD sgl"); + return -EINVAL; + } + bam_ce_buffer = &bam_txn->bam_ce[bam_txn->bam_ce_start]; bam_ce_size = (bam_txn->bam_ce_pos - bam_txn->bam_ce_start) * @@ -297,10 +309,20 @@ int qcom_prep_bam_dma_desc_data(struct qcom_nand_controller *nandc, bool read, struct bam_transaction *bam_txn = nandc->bam_txn; if (read) { + if (bam_txn->rx_sgl_pos >= bam_txn->data_sgl_nitems) { + dev_err(nandc->dev, "BAM %s array is full\n", "RX sgl"); + return -EINVAL; + } + sg_set_buf(&bam_txn->data_sgl[bam_txn->rx_sgl_pos], vaddr, size); bam_txn->rx_sgl_pos++; } else { + if (bam_txn->tx_sgl_pos >= bam_txn->data_sgl_nitems) { + dev_err(nandc->dev, "BAM %s array is full\n", "TX sgl"); + return -EINVAL; + } + sg_set_buf(&bam_txn->data_sgl[bam_txn->tx_sgl_pos], vaddr, size); bam_txn->tx_sgl_pos++; diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 7099db7a62be..c411fe9be3ef 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -1585,6 +1585,7 @@ static void spinand_cleanup(struct spinand_device *spinand) { struct nand_device *nand = spinand_to_nand(spinand); + nanddev_ecc_engine_cleanup(nand); nanddev_cleanup(nand); spinand_manufacturer_cleanup(spinand); kfree(spinand->databuf); diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c index 19f8dd4a6370..b7a28f001a38 100644 --- a/drivers/mtd/nand/spi/winbond.c +++ b/drivers/mtd/nand/spi/winbond.c @@ -25,7 +25,7 @@ static SPINAND_OP_VARIANTS(read_cache_octal_variants, SPINAND_PAGE_READ_FROM_CACHE_1S_1D_8D_OP(0, 2, NULL, 0, 105 * HZ_PER_MHZ), - SPINAND_PAGE_READ_FROM_CACHE_1S_8S_8S_OP(0, 16, NULL, 0, 86 * HZ_PER_MHZ), + SPINAND_PAGE_READ_FROM_CACHE_1S_8S_8S_OP(0, 16, NULL, 0, 162 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_1S_1S_8S_OP(0, 1, NULL, 0, 133 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(0, 1, NULL, 0), SPINAND_PAGE_READ_FROM_CACHE_1S_1S_1S_OP(0, 1, NULL, 0)); @@ -42,11 +42,11 @@ static SPINAND_OP_VARIANTS(update_cache_octal_variants, static SPINAND_OP_VARIANTS(read_cache_dual_quad_dtr_variants, SPINAND_PAGE_READ_FROM_CACHE_1S_4D_4D_OP(0, 8, NULL, 0, 80 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_1S_1D_4D_OP(0, 2, NULL, 0, 80 * HZ_PER_MHZ), - SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(0, 2, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(0, 2, NULL, 0, 104 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_1S_1S_4S_OP(0, 1, NULL, 0), SPINAND_PAGE_READ_FROM_CACHE_1S_2D_2D_OP(0, 4, NULL, 0, 80 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_1S_1D_2D_OP(0, 2, NULL, 0, 80 * HZ_PER_MHZ), - SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(0, 1, NULL, 0, 104 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_1S_1S_2S_OP(0, 1, NULL, 0), SPINAND_PAGE_READ_FROM_CACHE_1S_1D_1D_OP(0, 2, NULL, 0, 80 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(0, 1, NULL, 0), @@ -289,7 +289,7 @@ static const struct spinand_info winbond_spinand_table[] = { SPINAND_ECCINFO(&w35n01jw_ooblayout, NULL)), SPINAND_INFO("W35N02JW", /* 1.8V */ SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0xdf, 0x22), - NAND_MEMORG(1, 4096, 128, 64, 512, 10, 2, 1, 1), + NAND_MEMORG(1, 4096, 128, 64, 512, 10, 1, 2, 1), NAND_ECCREQ(1, 512), SPINAND_INFO_OP_VARIANTS(&read_cache_octal_variants, &write_cache_octal_variants, @@ -298,7 +298,7 @@ static const struct spinand_info winbond_spinand_table[] = { SPINAND_ECCINFO(&w35n01jw_ooblayout, NULL)), SPINAND_INFO("W35N04JW", /* 1.8V */ SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0xdf, 0x23), - NAND_MEMORG(1, 4096, 128, 64, 512, 10, 4, 1, 1), + NAND_MEMORG(1, 4096, 128, 64, 512, 10, 1, 4, 1), NAND_ECCREQ(1, 512), SPINAND_INFO_OP_VARIANTS(&read_cache_octal_variants, &write_cache_octal_variants, diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 6c656bfdb323..fe74dbd2c966 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -665,7 +665,7 @@ static int m_can_handle_lost_msg(struct net_device *dev) struct can_frame *frame; u32 timestamp = 0; - netdev_err(dev, "msg lost in rxf0\n"); + netdev_dbg(dev, "msg lost in rxf0\n"); stats->rx_errors++; stats->rx_over_errors++; diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c index e5c162f8c589..8edaa339d590 100644 --- a/drivers/net/can/m_can/tcan4x5x-core.c +++ b/drivers/net/can/m_can/tcan4x5x-core.c @@ -411,10 +411,11 @@ static int tcan4x5x_can_probe(struct spi_device *spi) priv = cdev_to_priv(mcan_class); priv->power = devm_regulator_get_optional(&spi->dev, "vsup"); - if (PTR_ERR(priv->power) == -EPROBE_DEFER) { - ret = -EPROBE_DEFER; - goto out_m_can_class_free_dev; - } else { + if (IS_ERR(priv->power)) { + if (PTR_ERR(priv->power) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto out_m_can_class_free_dev; + } priv->power = NULL; } diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index a7ec609d64de..9057180051df 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1065,23 +1065,18 @@ static void airoha_qdma_cleanup_tx_queue(struct airoha_queue *q) static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma) { + int size, index, num_desc = HW_DSCP_NUM; struct airoha_eth *eth = qdma->eth; int id = qdma - ð->qdma[0]; + u32 status, buf_size; dma_addr_t dma_addr; const char *name; - int size, index; - u32 status; - - size = HW_DSCP_NUM * sizeof(struct airoha_qdma_fwd_desc); - if (!dmam_alloc_coherent(eth->dev, size, &dma_addr, GFP_KERNEL)) - return -ENOMEM; - - airoha_qdma_wr(qdma, REG_FWD_DSCP_BASE, dma_addr); name = devm_kasprintf(eth->dev, GFP_KERNEL, "qdma%d-buf", id); if (!name) return -ENOMEM; + buf_size = id ? AIROHA_MAX_PACKET_SIZE / 2 : AIROHA_MAX_PACKET_SIZE; index = of_property_match_string(eth->dev->of_node, "memory-region-names", name); if (index >= 0) { @@ -1099,8 +1094,12 @@ static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma) rmem = of_reserved_mem_lookup(np); of_node_put(np); dma_addr = rmem->base; + /* Compute the number of hw descriptors according to the + * reserved memory size and the payload buffer size + */ + num_desc = div_u64(rmem->size, buf_size); } else { - size = AIROHA_MAX_PACKET_SIZE * HW_DSCP_NUM; + size = buf_size * num_desc; if (!dmam_alloc_coherent(eth->dev, size, &dma_addr, GFP_KERNEL)) return -ENOMEM; @@ -1108,15 +1107,21 @@ static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma) airoha_qdma_wr(qdma, REG_FWD_BUF_BASE, dma_addr); + size = num_desc * sizeof(struct airoha_qdma_fwd_desc); + if (!dmam_alloc_coherent(eth->dev, size, &dma_addr, GFP_KERNEL)) + return -ENOMEM; + + airoha_qdma_wr(qdma, REG_FWD_DSCP_BASE, dma_addr); + /* QDMA0: 2KB. QDMA1: 1KB */ airoha_qdma_rmw(qdma, REG_HW_FWD_DSCP_CFG, HW_FWD_DSCP_PAYLOAD_SIZE_MASK, - FIELD_PREP(HW_FWD_DSCP_PAYLOAD_SIZE_MASK, 0)); + FIELD_PREP(HW_FWD_DSCP_PAYLOAD_SIZE_MASK, !!id)); airoha_qdma_rmw(qdma, REG_FWD_DSCP_LOW_THR, FWD_DSCP_LOW_THR_MASK, FIELD_PREP(FWD_DSCP_LOW_THR_MASK, 128)); airoha_qdma_rmw(qdma, REG_LMGR_INIT_CFG, LMGR_INIT_START | LMGR_SRAM_MODE_MASK | HW_FWD_DESC_NUM_MASK, - FIELD_PREP(HW_FWD_DESC_NUM_MASK, HW_DSCP_NUM) | + FIELD_PREP(HW_FWD_DESC_NUM_MASK, num_desc) | LMGR_INIT_START | LMGR_SRAM_MODE_MASK); return read_poll_timeout(airoha_qdma_rr, status, @@ -2979,6 +2984,7 @@ static int airoha_probe(struct platform_device *pdev) error_napi_stop: for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) airoha_qdma_stop_napi(ð->qdma[i]); + airoha_ppe_deinit(eth); error_hw_cleanup: for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) airoha_hw_cleanup(ð->qdma[i]); diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 9067d2fc7706..0e217acfc5ef 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -809,8 +809,10 @@ airoha_ppe_foe_flow_l2_entry_update(struct airoha_ppe *ppe, int idle; hwe = airoha_ppe_foe_get_entry(ppe, iter->hash); - ib1 = READ_ONCE(hwe->ib1); + if (!hwe) + continue; + ib1 = READ_ONCE(hwe->ib1); state = FIELD_GET(AIROHA_FOE_IB1_BIND_STATE, ib1); if (state != AIROHA_FOE_STATE_BIND) { iter->hash = 0xffff; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index e1296cbf4ff3..9316de4126cf 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -1269,6 +1269,8 @@ #define MDIO_VEND2_CTRL1_SS13 BIT(13) #endif +#define XGBE_VEND2_MAC_AUTO_SW BIT(9) + /* MDIO mask values */ #define XGBE_AN_CL73_INT_CMPLT BIT(0) #define XGBE_AN_CL73_INC_LINK BIT(1) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 71449edbb76d..1a37ec45e650 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -266,6 +266,10 @@ static void xgbe_an37_set(struct xgbe_prv_data *pdata, bool enable, reg |= MDIO_VEND2_CTRL1_AN_RESTART; XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_CTRL1, reg); + + reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_PCS_DIG_CTRL); + reg |= XGBE_VEND2_MAC_AUTO_SW; + XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_PCS_DIG_CTRL, reg); } static void xgbe_an37_restart(struct xgbe_prv_data *pdata) @@ -894,6 +898,11 @@ static void xgbe_an37_init(struct xgbe_prv_data *pdata) netif_dbg(pdata, link, pdata->netdev, "CL37 AN (%s) initialized\n", (pdata->an_mode == XGBE_AN_MODE_CL37) ? "BaseX" : "SGMII"); + + reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1); + reg &= ~MDIO_AN_CTRL1_ENABLE; + XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_CTRL1, reg); + } static void xgbe_an73_init(struct xgbe_prv_data *pdata) @@ -1295,6 +1304,10 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata) pdata->phy.link = pdata->phy_if.phy_impl.link_status(pdata, &an_restart); + /* bail out if the link status register read fails */ + if (pdata->phy.link < 0) + return; + if (an_restart) { xgbe_phy_config_aneg(pdata); goto adjust_link; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 7a4dfa4e19c7..23c39e92e783 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -2746,8 +2746,7 @@ static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed) static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) { struct xgbe_phy_data *phy_data = pdata->phy_data; - unsigned int reg; - int ret; + int reg, ret; *an_restart = 0; @@ -2781,11 +2780,20 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) return 0; } - /* Link status is latched low, so read once to clear - * and then read again to get current state - */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + if (reg < 0) + return reg; + + /* Link status is latched low so that momentary link drops + * can be detected. If link was already down read again + * to get the latest state. + */ + + if (!pdata->phy.link && !(reg & MDIO_STAT1_LSTATUS)) { + reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); + if (reg < 0) + return reg; + } if (pdata->en_rx_adap) { /* if the link is available and adaptation is done, @@ -2804,9 +2812,7 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) xgbe_phy_set_mode(pdata, phy_data->cur_mode); } - /* check again for the link and adaptation status */ - reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1); - if ((reg & MDIO_STAT1_LSTATUS) && pdata->rx_adapt_done) + if (pdata->rx_adapt_done) return 1; } else if (reg & MDIO_STAT1_LSTATUS) return 1; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 6359bb87dc13..057379cd43ba 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -183,12 +183,12 @@ #define XGBE_LINK_TIMEOUT 5 #define XGBE_KR_TRAINING_WAIT_ITER 50 -#define XGBE_SGMII_AN_LINK_STATUS BIT(1) +#define XGBE_SGMII_AN_LINK_DUPLEX BIT(1) #define XGBE_SGMII_AN_LINK_SPEED (BIT(2) | BIT(3)) #define XGBE_SGMII_AN_LINK_SPEED_10 0x00 #define XGBE_SGMII_AN_LINK_SPEED_100 0x04 #define XGBE_SGMII_AN_LINK_SPEED_1000 0x08 -#define XGBE_SGMII_AN_LINK_DUPLEX BIT(4) +#define XGBE_SGMII_AN_LINK_STATUS BIT(4) /* ECC correctable error notification window (seconds) */ #define XGBE_ECC_LIMIT 60 diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index cfdb546a09e7..98a4d089270e 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -1861,14 +1861,21 @@ static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter) break; } - buffer_info->alloced = 1; - buffer_info->skb = skb; - buffer_info->length = (u16) adapter->rx_buffer_len; page = virt_to_page(skb->data); offset = offset_in_page(skb->data); buffer_info->dma = dma_map_page(&pdev->dev, page, offset, adapter->rx_buffer_len, DMA_FROM_DEVICE); + if (dma_mapping_error(&pdev->dev, buffer_info->dma)) { + kfree_skb(skb); + adapter->soft_stats.rx_dropped++; + break; + } + + buffer_info->alloced = 1; + buffer_info->skb = skb; + buffer_info->length = (u16)adapter->rx_buffer_len; + rfd_desc->buffer_addr = cpu_to_le64(buffer_info->dma); rfd_desc->buf_len = cpu_to_le16(adapter->rx_buffer_len); rfd_desc->coalese = 0; @@ -2183,8 +2190,8 @@ static int atl1_tx_csum(struct atl1_adapter *adapter, struct sk_buff *skb, return 0; } -static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, - struct tx_packet_desc *ptpd) +static bool atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, + struct tx_packet_desc *ptpd) { struct atl1_tpd_ring *tpd_ring = &adapter->tpd_ring; struct atl1_buffer *buffer_info; @@ -2194,6 +2201,7 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, unsigned int nr_frags; unsigned int f; int retval; + u16 first_mapped; u16 next_to_use; u16 data_len; u8 hdr_len; @@ -2201,6 +2209,7 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, buf_len -= skb->data_len; nr_frags = skb_shinfo(skb)->nr_frags; next_to_use = atomic_read(&tpd_ring->next_to_use); + first_mapped = next_to_use; buffer_info = &tpd_ring->buffer_info[next_to_use]; BUG_ON(buffer_info->skb); /* put skb in last TPD */ @@ -2216,6 +2225,8 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, buffer_info->dma = dma_map_page(&adapter->pdev->dev, page, offset, hdr_len, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) + goto dma_err; if (++next_to_use == tpd_ring->count) next_to_use = 0; @@ -2242,6 +2253,9 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, page, offset, buffer_info->length, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, + buffer_info->dma)) + goto dma_err; if (++next_to_use == tpd_ring->count) next_to_use = 0; } @@ -2254,6 +2268,8 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, buffer_info->dma = dma_map_page(&adapter->pdev->dev, page, offset, buf_len, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) + goto dma_err; if (++next_to_use == tpd_ring->count) next_to_use = 0; } @@ -2277,6 +2293,9 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, buffer_info->dma = skb_frag_dma_map(&adapter->pdev->dev, frag, i * ATL1_MAX_TX_BUF_LEN, buffer_info->length, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, + buffer_info->dma)) + goto dma_err; if (++next_to_use == tpd_ring->count) next_to_use = 0; @@ -2285,6 +2304,22 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, /* last tpd's buffer-info */ buffer_info->skb = skb; + + return true; + + dma_err: + while (first_mapped != next_to_use) { + buffer_info = &tpd_ring->buffer_info[first_mapped]; + dma_unmap_page(&adapter->pdev->dev, + buffer_info->dma, + buffer_info->length, + DMA_TO_DEVICE); + buffer_info->dma = 0; + + if (++first_mapped == tpd_ring->count) + first_mapped = 0; + } + return false; } static void atl1_tx_queue(struct atl1_adapter *adapter, u16 count, @@ -2355,10 +2390,8 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb, len = skb_headlen(skb); - if (unlikely(skb->len <= 0)) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } + if (unlikely(skb->len <= 0)) + goto drop_packet; nr_frags = skb_shinfo(skb)->nr_frags; for (f = 0; f < nr_frags; f++) { @@ -2371,10 +2404,9 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb, if (mss) { if (skb->protocol == htons(ETH_P_IP)) { proto_hdr_len = skb_tcp_all_headers(skb); - if (unlikely(proto_hdr_len > len)) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } + if (unlikely(proto_hdr_len > len)) + goto drop_packet; + /* need additional TPD ? */ if (proto_hdr_len != len) count += (len - proto_hdr_len + @@ -2406,23 +2438,26 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb, } tso = atl1_tso(adapter, skb, ptpd); - if (tso < 0) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } + if (tso < 0) + goto drop_packet; if (!tso) { ret_val = atl1_tx_csum(adapter, skb, ptpd); - if (ret_val < 0) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } + if (ret_val < 0) + goto drop_packet; } - atl1_tx_map(adapter, skb, ptpd); + if (!atl1_tx_map(adapter, skb, ptpd)) + goto drop_packet; + atl1_tx_queue(adapter, count, ptpd); atl1_update_mailbox(adapter); return NETDEV_TX_OK; + +drop_packet: + adapter->soft_stats.tx_errors++; + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; } static int atl1_rings_clean(struct napi_struct *napi, int budget) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 869580b6f70d..243cb13cb01c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2989,6 +2989,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, { struct bnxt_napi *bnapi = cpr->bnapi; u32 raw_cons = cpr->cp_raw_cons; + bool flush_xdp = false; u32 cons; int rx_pkts = 0; u8 event = 0; @@ -3042,6 +3043,8 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, else rc = bnxt_force_rx_discard(bp, cpr, &raw_cons, &event); + if (event & BNXT_REDIRECT_EVENT) + flush_xdp = true; if (likely(rc >= 0)) rx_pkts += rc; /* Increment rx_pkts when rc is -ENOMEM to count towards @@ -3066,7 +3069,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } } - if (event & BNXT_REDIRECT_EVENT) { + if (flush_xdp) { xdp_do_flush(); event &= ~BNXT_REDIRECT_EVENT; } @@ -10780,6 +10783,72 @@ void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, bp->num_rss_ctx--; } +static bool bnxt_vnic_has_rx_ring(struct bnxt *bp, struct bnxt_vnic_info *vnic, + int rxr_id) +{ + u16 tbl_size = bnxt_get_rxfh_indir_size(bp->dev); + int i, vnic_rx; + + /* Ntuple VNIC always has all the rx rings. Any change of ring id + * must be updated because a future filter may use it. + */ + if (vnic->flags & BNXT_VNIC_NTUPLE_FLAG) + return true; + + for (i = 0; i < tbl_size; i++) { + if (vnic->flags & BNXT_VNIC_RSSCTX_FLAG) + vnic_rx = ethtool_rxfh_context_indir(vnic->rss_ctx)[i]; + else + vnic_rx = bp->rss_indir_tbl[i]; + + if (rxr_id == vnic_rx) + return true; + } + + return false; +} + +static int bnxt_set_vnic_mru_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic, + u16 mru, int rxr_id) +{ + int rc; + + if (!bnxt_vnic_has_rx_ring(bp, vnic, rxr_id)) + return 0; + + if (mru) { + rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true); + if (rc) { + netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n", + vnic->vnic_id, rc); + return rc; + } + } + vnic->mru = mru; + bnxt_hwrm_vnic_update(bp, vnic, + VNIC_UPDATE_REQ_ENABLES_MRU_VALID); + + return 0; +} + +static int bnxt_set_rss_ctx_vnic_mru(struct bnxt *bp, u16 mru, int rxr_id) +{ + struct ethtool_rxfh_context *ctx; + unsigned long context; + int rc; + + xa_for_each(&bp->dev->ethtool->rss_ctx, context, ctx) { + struct bnxt_rss_ctx *rss_ctx = ethtool_rxfh_context_priv(ctx); + struct bnxt_vnic_info *vnic = &rss_ctx->vnic; + + rc = bnxt_set_vnic_mru_p5(bp, vnic, mru, rxr_id); + if (rc) + return rc; + } + + return 0; +} + static void bnxt_hwrm_realloc_rss_ctx_vnic(struct bnxt *bp) { bool set_tpa = !!(bp->flags & BNXT_FLAG_TPA); @@ -11538,11 +11607,9 @@ static void bnxt_free_irq(struct bnxt *bp) static int bnxt_request_irq(struct bnxt *bp) { + struct cpu_rmap *rmap = NULL; int i, j, rc = 0; unsigned long flags = 0; -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; -#endif rc = bnxt_setup_int_mode(bp); if (rc) { @@ -11563,15 +11630,15 @@ static int bnxt_request_irq(struct bnxt *bp) int map_idx = bnxt_cp_num_to_irq_num(bp, i); struct bnxt_irq *irq = &bp->irq_tbl[map_idx]; -#ifdef CONFIG_RFS_ACCEL - if (rmap && bp->bnapi[i]->rx_ring) { + if (IS_ENABLED(CONFIG_RFS_ACCEL) && + rmap && bp->bnapi[i]->rx_ring) { rc = irq_cpu_rmap_add(rmap, irq->vector); if (rc) netdev_warn(bp->dev, "failed adding irq rmap for ring %d\n", j); j++; } -#endif + rc = request_irq(irq->vector, irq->handler, flags, irq->name, bp->bnapi[i]); if (rc) @@ -15927,6 +15994,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) struct bnxt_vnic_info *vnic; struct bnxt_napi *bnapi; int i, rc; + u16 mru; rxr = &bp->rx_ring[idx]; clone = qmem; @@ -15977,21 +16045,15 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) napi_enable_locked(&bnapi->napi); bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); + mru = bp->dev->mtu + ETH_HLEN + VLAN_HLEN; for (i = 0; i < bp->nr_vnics; i++) { vnic = &bp->vnic_info[i]; - rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true); - if (rc) { - netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n", - vnic->vnic_id, rc); + rc = bnxt_set_vnic_mru_p5(bp, vnic, mru, idx); + if (rc) return rc; - } - vnic->mru = bp->dev->mtu + ETH_HLEN + VLAN_HLEN; - bnxt_hwrm_vnic_update(bp, vnic, - VNIC_UPDATE_REQ_ENABLES_MRU_VALID); } - - return 0; + return bnxt_set_rss_ctx_vnic_mru(bp, mru, idx); err_reset: netdev_err(bp->dev, "Unexpected HWRM error during queue start rc: %d\n", @@ -16013,10 +16075,10 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx) for (i = 0; i < bp->nr_vnics; i++) { vnic = &bp->vnic_info[i]; - vnic->mru = 0; - bnxt_hwrm_vnic_update(bp, vnic, - VNIC_UPDATE_REQ_ENABLES_MRU_VALID); + + bnxt_set_vnic_mru_p5(bp, vnic, 0, idx); } + bnxt_set_rss_ctx_vnic_mru(bp, 0, idx); /* Make sure NAPI sees that the VNIC is disabled */ synchronize_net(); rxr = &bp->rx_ring[idx]; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index ce97befd3cb3..67e70d3d0980 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -368,23 +368,27 @@ static u32 bnxt_get_ctx_coredump(struct bnxt *bp, void *buf, u32 offset, if (!ctxm->mem_valid || !seg_id) continue; - if (trace) + if (trace) { extra_hlen = BNXT_SEG_RCD_LEN; + if (buf) { + u16 trace_type = bnxt_bstore_to_trace[type]; + + bnxt_fill_drv_seg_record(bp, &record, ctxm, + trace_type); + } + } + if (buf) data = buf + BNXT_SEG_HDR_LEN + extra_hlen; + seg_len = bnxt_copy_ctx_mem(bp, ctxm, data, 0) + extra_hlen; if (buf) { bnxt_fill_coredump_seg_hdr(bp, &seg_hdr, NULL, seg_len, 0, 0, 0, comp_id, seg_id); memcpy(buf, &seg_hdr, BNXT_SEG_HDR_LEN); buf += BNXT_SEG_HDR_LEN; - if (trace) { - u16 trace_type = bnxt_bstore_to_trace[type]; - - bnxt_fill_drv_seg_record(bp, &record, ctxm, - trace_type); + if (trace) memcpy(buf, &record, BNXT_SEG_RCD_LEN); - } buf += seg_len; } len += BNXT_SEG_HDR_LEN + seg_len; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index 0dbb880a7aa0..71e14be2507e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -487,7 +487,9 @@ static int bnxt_ets_validate(struct bnxt *bp, struct ieee_ets *ets, u8 *tc) if ((ets->tc_tx_bw[i] || ets->tc_tsa[i]) && i > bp->max_tc) return -EINVAL; + } + for (i = 0; i < max_tc; i++) { switch (ets->tc_tsa[i]) { case IEEE_8021QAZ_TSA_STRICT: break; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 84c4812414fd..2450a369b792 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -231,10 +231,9 @@ void bnxt_ulp_stop(struct bnxt *bp) return; mutex_lock(&edev->en_dev_lock); - if (!bnxt_ulp_registered(edev)) { - mutex_unlock(&edev->en_dev_lock); - return; - } + if (!bnxt_ulp_registered(edev) || + (edev->flags & BNXT_EN_FLAG_ULP_STOPPED)) + goto ulp_stop_exit; edev->flags |= BNXT_EN_FLAG_ULP_STOPPED; if (aux_priv) { @@ -250,6 +249,7 @@ void bnxt_ulp_stop(struct bnxt *bp) adrv->suspend(adev, pm); } } +ulp_stop_exit: mutex_unlock(&edev->en_dev_lock); } @@ -258,19 +258,13 @@ void bnxt_ulp_start(struct bnxt *bp, int err) struct bnxt_aux_priv *aux_priv = bp->aux_priv; struct bnxt_en_dev *edev = bp->edev; - if (!edev) - return; - - edev->flags &= ~BNXT_EN_FLAG_ULP_STOPPED; - - if (err) + if (!edev || err) return; mutex_lock(&edev->en_dev_lock); - if (!bnxt_ulp_registered(edev)) { - mutex_unlock(&edev->en_dev_lock); - return; - } + if (!bnxt_ulp_registered(edev) || + !(edev->flags & BNXT_EN_FLAG_ULP_STOPPED)) + goto ulp_start_exit; if (edev->ulp_tbl->msix_requested) bnxt_fill_msix_vecs(bp, edev->msix_entries); @@ -287,6 +281,8 @@ void bnxt_ulp_start(struct bnxt *bp, int err) adrv->resume(adev); } } +ulp_start_exit: + edev->flags &= ~BNXT_EN_FLAG_ULP_STOPPED; mutex_unlock(&edev->en_dev_lock); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 4a6d8cb9f970..09e7e8efa6fa 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -115,7 +115,7 @@ static void __bnxt_xmit_xdp_redirect(struct bnxt *bp, tx_buf->action = XDP_REDIRECT; tx_buf->xdpf = xdpf; dma_unmap_addr_set(tx_buf, mapping, mapping); - dma_unmap_len_set(tx_buf, len, 0); + dma_unmap_len_set(tx_buf, len, len); } void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index fa0077bc67b7..97585c160de3 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -4092,6 +4092,12 @@ static int bcmgenet_probe(struct platform_device *pdev) for (i = 0; i <= priv->hw_params->rx_queues; i++) priv->rx_rings[i].rx_max_coalesced_frames = 1; + /* Initialize u64 stats seq counter for 32bit machines */ + for (i = 0; i <= priv->hw_params->rx_queues; i++) + u64_stats_init(&priv->rx_rings[i].stats64.syncp); + for (i = 0; i <= priv->hw_params->tx_queues; i++) + u64_stats_init(&priv->tx_rings[i].stats64.syncp); + /* libphy will determine the link state */ netif_carrier_off(dev); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index aebb9fef3f6e..1be2dc40a1a6 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1578,7 +1578,6 @@ napi_del: static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) { struct nicvf *nic = netdev_priv(netdev); - int orig_mtu = netdev->mtu; /* For now just support only the usual MTU sized frames, * plus some headroom for VLAN, QinQ. @@ -1589,15 +1588,10 @@ static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) return -EINVAL; } - WRITE_ONCE(netdev->mtu, new_mtu); - - if (!netif_running(netdev)) - return 0; - - if (nicvf_update_hw_max_frs(nic, new_mtu)) { - netdev->mtu = orig_mtu; + if (netif_running(netdev) && nicvf_update_hw_max_frs(nic, new_mtu)) return -EINVAL; - } + + WRITE_ONCE(netdev->mtu, new_mtu); return 0; } diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 773f5ad972a2..6bc8dfdb3d4b 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1864,10 +1864,10 @@ static int enic_change_mtu(struct net_device *netdev, int new_mtu) if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) return -EOPNOTSUPP; - if (netdev->mtu > enic->port_mtu) + if (new_mtu > enic->port_mtu) netdev_warn(netdev, "interface MTU (%d) set higher than port MTU (%d)\n", - netdev->mtu, enic->port_mtu); + new_mtu, enic->port_mtu); return _enic_change_mtu(netdev, new_mtu); } diff --git a/drivers/net/ethernet/faraday/Kconfig b/drivers/net/ethernet/faraday/Kconfig index c699bd6bcbb9..474073c7f94d 100644 --- a/drivers/net/ethernet/faraday/Kconfig +++ b/drivers/net/ethernet/faraday/Kconfig @@ -31,6 +31,7 @@ config FTGMAC100 depends on ARM || COMPILE_TEST depends on !64BIT || BROKEN select PHYLIB + select FIXED_PHY select MDIO_ASPEED if MACH_ASPEED_G6 select CRC32 help diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 2ec2c3dab250..b82f121cadad 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -3939,6 +3939,7 @@ static int dpaa2_eth_setup_rx_flow(struct dpaa2_eth_priv *priv, MEM_TYPE_PAGE_ORDER0, NULL); if (err) { dev_err(dev, "xdp_rxq_info_reg_mem_model failed\n"); + xdp_rxq_info_unreg(&fq->channel->xdp_rxq); return err; } @@ -4432,17 +4433,25 @@ static int dpaa2_eth_bind_dpni(struct dpaa2_eth_priv *priv) return -EINVAL; } if (err) - return err; + goto out; } err = dpni_get_qdid(priv->mc_io, 0, priv->mc_token, DPNI_QUEUE_TX, &priv->tx_qdid); if (err) { dev_err(dev, "dpni_get_qdid() failed\n"); - return err; + goto out; } return 0; + +out: + while (i--) { + if (priv->fq[i].type == DPAA2_RX_FQ && + xdp_rxq_info_is_reg(&priv->fq[i].channel->xdp_rxq)) + xdp_rxq_info_unreg(&priv->fq[i].channel->xdp_rxq); + } + return err; } /* Allocate rings for storing incoming frame descriptors */ @@ -4825,6 +4834,17 @@ static void dpaa2_eth_del_ch_napi(struct dpaa2_eth_priv *priv) } } +static void dpaa2_eth_free_rx_xdp_rxq(struct dpaa2_eth_priv *priv) +{ + int i; + + for (i = 0; i < priv->num_fqs; i++) { + if (priv->fq[i].type == DPAA2_RX_FQ && + xdp_rxq_info_is_reg(&priv->fq[i].channel->xdp_rxq)) + xdp_rxq_info_unreg(&priv->fq[i].channel->xdp_rxq); + } +} + static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) { struct device *dev; @@ -5028,6 +5048,7 @@ err_alloc_percpu_extras: free_percpu(priv->percpu_stats); err_alloc_percpu_stats: dpaa2_eth_del_ch_napi(priv); + dpaa2_eth_free_rx_xdp_rxq(priv); err_bind: dpaa2_eth_free_dpbps(priv); err_dpbp_setup: @@ -5080,6 +5101,7 @@ static void dpaa2_eth_remove(struct fsl_mc_device *ls_dev) free_percpu(priv->percpu_extras); dpaa2_eth_del_ch_napi(priv); + dpaa2_eth_free_rx_xdp_rxq(priv); dpaa2_eth_free_dpbps(priv); dpaa2_eth_free_dpio(priv); dpaa2_eth_free_dpni(priv); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 4098f01479bc..53e8d18c7a34 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -507,7 +507,7 @@ static inline u64 _enetc_rd_reg64(void __iomem *reg) tmp = ioread32(reg + 4); } while (high != tmp); - return le64_to_cpu((__le64)high << 32 | low); + return (u64)high << 32 | low; } #endif diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index a189038d88df..246ddce753f9 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -211,7 +211,6 @@ struct ibmvnic_statistics { u8 reserved[72]; } __packed __aligned(8); -#define NUM_TX_STATS 3 struct ibmvnic_tx_queue_stats { u64 batched_packets; u64 direct_packets; @@ -219,13 +218,18 @@ struct ibmvnic_tx_queue_stats { u64 dropped_packets; }; -#define NUM_RX_STATS 3 +#define NUM_TX_STATS \ + (sizeof(struct ibmvnic_tx_queue_stats) / sizeof(u64)) + struct ibmvnic_rx_queue_stats { u64 packets; u64 bytes; u64 interrupts; }; +#define NUM_RX_STATS \ + (sizeof(struct ibmvnic_rx_queue_stats) / sizeof(u64)) + struct ibmvnic_acl_buffer { __be32 len; __be32 version; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index a96f4cfa6e17..7719e15813ee 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3534,9 +3534,6 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) case e1000_pch_cnp: case e1000_pch_tgp: case e1000_pch_adp: - case e1000_pch_mtp: - case e1000_pch_lnp: - case e1000_pch_ptp: case e1000_pch_nvp: if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) { /* Stable 24MHz frequency */ @@ -3552,6 +3549,17 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) adapter->cc.shift = shift; } break; + case e1000_pch_mtp: + case e1000_pch_lnp: + case e1000_pch_ptp: + /* System firmware can misreport this value, so set it to a + * stable 38400KHz frequency. + */ + incperiod = INCPERIOD_38400KHZ; + incvalue = INCVALUE_38400KHZ; + shift = INCVALUE_SHIFT_38400KHZ; + adapter->cc.shift = shift; + break; case e1000_82574: case e1000_82583: /* Stable 25MHz frequency */ diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 89d57dd911dc..ea3c3eb2ef20 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -295,15 +295,17 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) case e1000_pch_cnp: case e1000_pch_tgp: case e1000_pch_adp: - case e1000_pch_mtp: - case e1000_pch_lnp: - case e1000_pch_ptp: case e1000_pch_nvp: if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) adapter->ptp_clock_info.max_adj = MAX_PPB_24MHZ; else adapter->ptp_clock_info.max_adj = MAX_PPB_38400KHZ; break; + case e1000_pch_mtp: + case e1000_pch_lnp: + case e1000_pch_ptp: + adapter->ptp_clock_info.max_adj = MAX_PPB_38400KHZ; + break; case e1000_82574: case e1000_82583: adapter->ptp_clock_info.max_adj = MAX_PPB_25MHZ; diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 2bc5c7f59844..1f7834c03550 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -378,6 +378,50 @@ ice_arfs_is_perfect_flow_set(struct ice_hw *hw, __be16 l3_proto, u8 l4_proto) } /** + * ice_arfs_cmp - Check if aRFS filter matches this flow. + * @fltr_info: filter info of the saved ARFS entry. + * @fk: flow dissector keys. + * @n_proto: One of htons(ETH_P_IP) or htons(ETH_P_IPV6). + * @ip_proto: One of IPPROTO_TCP or IPPROTO_UDP. + * + * Since this function assumes limited values for n_proto and ip_proto, it + * is meant to be called only from ice_rx_flow_steer(). + * + * Return: + * * true - fltr_info refers to the same flow as fk. + * * false - fltr_info and fk refer to different flows. + */ +static bool +ice_arfs_cmp(const struct ice_fdir_fltr *fltr_info, const struct flow_keys *fk, + __be16 n_proto, u8 ip_proto) +{ + /* Determine if the filter is for IPv4 or IPv6 based on flow_type, + * which is one of ICE_FLTR_PTYPE_NONF_IPV{4,6}_{TCP,UDP}. + */ + bool is_v4 = fltr_info->flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP || + fltr_info->flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP; + + /* Following checks are arranged in the quickest and most discriminative + * fields first for early failure. + */ + if (is_v4) + return n_proto == htons(ETH_P_IP) && + fltr_info->ip.v4.src_port == fk->ports.src && + fltr_info->ip.v4.dst_port == fk->ports.dst && + fltr_info->ip.v4.src_ip == fk->addrs.v4addrs.src && + fltr_info->ip.v4.dst_ip == fk->addrs.v4addrs.dst && + fltr_info->ip.v4.proto == ip_proto; + + return fltr_info->ip.v6.src_port == fk->ports.src && + fltr_info->ip.v6.dst_port == fk->ports.dst && + fltr_info->ip.v6.proto == ip_proto && + !memcmp(&fltr_info->ip.v6.src_ip, &fk->addrs.v6addrs.src, + sizeof(struct in6_addr)) && + !memcmp(&fltr_info->ip.v6.dst_ip, &fk->addrs.v6addrs.dst, + sizeof(struct in6_addr)); +} + +/** * ice_rx_flow_steer - steer the Rx flow to where application is being run * @netdev: ptr to the netdev being adjusted * @skb: buffer with required header information @@ -448,6 +492,10 @@ ice_rx_flow_steer(struct net_device *netdev, const struct sk_buff *skb, continue; fltr_info = &arfs_entry->fltr_info; + + if (!ice_arfs_cmp(fltr_info, &fk, n_proto, ip_proto)) + continue; + ret = fltr_info->fltr_id; if (fltr_info->q_index == rxq_idx || diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index 6aae03771746..2e4f0969035f 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -508,10 +508,14 @@ err_create_repr: */ int ice_eswitch_attach_vf(struct ice_pf *pf, struct ice_vf *vf) { - struct ice_repr *repr = ice_repr_create_vf(vf); struct devlink *devlink = priv_to_devlink(pf); + struct ice_repr *repr; int err; + if (!ice_is_eswitch_mode_switchdev(pf)) + return 0; + + repr = ice_repr_create_vf(vf); if (IS_ERR(repr)) return PTR_ERR(repr); diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.c b/drivers/net/ethernet/intel/idpf/idpf_controlq.c index b28991dd1870..48b8e184f3db 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_controlq.c +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.c @@ -96,7 +96,7 @@ static void idpf_ctlq_init_rxq_bufs(struct idpf_ctlq_info *cq) */ static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq) { - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); /* free ring buffers and the ring itself */ idpf_ctlq_dealloc_ring_res(hw, cq); @@ -104,8 +104,7 @@ static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq) /* Set ring_size to 0 to indicate uninitialized queue */ cq->ring_size = 0; - mutex_unlock(&cq->cq_lock); - mutex_destroy(&cq->cq_lock); + spin_unlock(&cq->cq_lock); } /** @@ -173,7 +172,7 @@ int idpf_ctlq_add(struct idpf_hw *hw, idpf_ctlq_init_regs(hw, cq, is_rxq); - mutex_init(&cq->cq_lock); + spin_lock_init(&cq->cq_lock); list_add(&cq->cq_list, &hw->cq_list_head); @@ -272,7 +271,7 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq, int err = 0; int i; - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); /* Ensure there are enough descriptors to send all messages */ num_desc_avail = IDPF_CTLQ_DESC_UNUSED(cq); @@ -332,7 +331,7 @@ int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq, wr32(hw, cq->reg.tail, cq->next_to_use); err_unlock: - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); return err; } @@ -364,7 +363,7 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count, if (*clean_count > cq->ring_size) return -EBADR; - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); ntc = cq->next_to_clean; @@ -397,7 +396,7 @@ int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count, cq->next_to_clean = ntc; - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); /* Return number of descriptors actually cleaned */ *clean_count = i; @@ -435,7 +434,7 @@ int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw, struct idpf_ctlq_info *cq, if (*buff_count > 0) buffs_avail = true; - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); if (tbp >= cq->ring_size) tbp = 0; @@ -524,7 +523,7 @@ post_buffs_out: wr32(hw, cq->reg.tail, cq->next_to_post); } - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); /* return the number of buffers that were not posted */ *buff_count = *buff_count - i; @@ -552,7 +551,7 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, u16 i; /* take the lock before we start messing with the ring */ - mutex_lock(&cq->cq_lock); + spin_lock(&cq->cq_lock); ntc = cq->next_to_clean; @@ -614,7 +613,7 @@ int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, cq->next_to_clean = ntc; - mutex_unlock(&cq->cq_lock); + spin_unlock(&cq->cq_lock); *num_q_msg = i; if (*num_q_msg == 0) diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h index 9642494a67d8..3414c5f9a831 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h @@ -99,7 +99,7 @@ struct idpf_ctlq_info { enum idpf_ctlq_type cq_type; int q_id; - struct mutex cq_lock; /* control queue lock */ + spinlock_t cq_lock; /* control queue lock */ /* used for interrupt processing */ u16 next_to_use; u16 next_to_clean; diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c index 9bdb309b668e..eaf7a2606faa 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c @@ -47,7 +47,7 @@ static u32 idpf_get_rxfh_key_size(struct net_device *netdev) struct idpf_vport_user_config_data *user_config; if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) - return -EOPNOTSUPP; + return 0; user_config = &np->adapter->vport_config[np->vport_idx]->user_config; @@ -66,7 +66,7 @@ static u32 idpf_get_rxfh_indir_size(struct net_device *netdev) struct idpf_vport_user_config_data *user_config; if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) - return -EOPNOTSUPP; + return 0; user_config = &np->adapter->vport_config[np->vport_idx]->user_config; diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 4eb20ec2accb..80382ff4a5fa 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -2314,8 +2314,12 @@ void *idpf_alloc_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem, u64 size) struct idpf_adapter *adapter = hw->back; size_t sz = ALIGN(size, 4096); - mem->va = dma_alloc_coherent(&adapter->pdev->dev, sz, - &mem->pa, GFP_KERNEL); + /* The control queue resources are freed under a spinlock, contiguous + * pages will avoid IOMMU remapping and the use vmap (and vunmap in + * dma_free_*() path. + */ + mem->va = dma_alloc_attrs(&adapter->pdev->dev, sz, &mem->pa, + GFP_KERNEL, DMA_ATTR_FORCE_CONTIGUOUS); mem->size = sz; return mem->va; @@ -2330,8 +2334,8 @@ void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem) { struct idpf_adapter *adapter = hw->back; - dma_free_coherent(&adapter->pdev->dev, mem->size, - mem->va, mem->pa); + dma_free_attrs(&adapter->pdev->dev, mem->size, + mem->va, mem->pa, DMA_ATTR_FORCE_CONTIGUOUS); mem->size = 0; mem->va = NULL; mem->pa = 0; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 686793c539f2..031c332f66c4 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7115,6 +7115,10 @@ static int igc_probe(struct pci_dev *pdev, adapter->port_num = hw->bus.func; adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); + /* Disable ASPM L1.2 on I226 devices to avoid packet loss */ + if (igc_is_device_id_i226(hw)) + pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); + err = pci_save_state(pdev); if (err) goto err_ioremap; @@ -7500,6 +7504,9 @@ static int __igc_resume(struct device *dev, bool rpm) pci_enable_wake(pdev, PCI_D3hot, 0); pci_enable_wake(pdev, PCI_D3cold, 0); + if (igc_is_device_id_i226(hw)) + pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2); + if (igc_init_interrupt_scheme(adapter, true)) { netdev_err(netdev, "Unable to allocate memory for queues\n"); return -ENOMEM; @@ -7625,6 +7632,9 @@ static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev) pci_enable_wake(pdev, PCI_D3hot, 0); pci_enable_wake(pdev, PCI_D3cold, 0); + if (igc_is_device_id_i226(hw)) + pci_disable_link_state_locked(pdev, PCIE_LINK_STATE_L1_2); + /* In case of PCI error, adapter loses its HW address * so we should re-assign it here. */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 6f572589f1e5..6b5c9536d26d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1822,7 +1822,7 @@ int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable) req->chan_cnt = IEEE_8021QAZ_MAX_TCS; req->bpid_per_chan = 1; } else { - req->chan_cnt = 1; + req->chan_cnt = pfvf->hw.rx_chan_cnt; req->bpid_per_chan = 0; } @@ -1847,7 +1847,7 @@ int otx2_nix_cpt_config_bp(struct otx2_nic *pfvf, bool enable) req->chan_cnt = IEEE_8021QAZ_MAX_TCS; req->bpid_per_chan = 1; } else { - req->chan_cnt = 1; + req->chan_cnt = pfvf->hw.rx_chan_cnt; req->bpid_per_chan = 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index b5c3a2a9d2a5..9560fcba643f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -18,7 +18,8 @@ enum { enum { MLX5E_TC_PRIO = 0, - MLX5E_NIC_PRIO + MLX5E_PROMISC_PRIO, + MLX5E_NIC_PRIO, }; struct mlx5e_flow_table { @@ -68,9 +69,13 @@ struct mlx5e_l2_table { MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_IPSEC_SPI) -/* NIC prio FTS */ +/* NIC promisc FT level */ enum { MLX5E_PROMISC_FT_LEVEL, +}; + +/* NIC prio FTS */ +enum { MLX5E_VLAN_FT_LEVEL, MLX5E_L2_FT_LEVEL, MLX5E_TTC_FT_LEVEL, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index 298bb74ec5e9..d1d629697e28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -113,7 +113,7 @@ int mlx5e_dim_rx_change(struct mlx5e_rq *rq, bool enable) __set_bit(MLX5E_RQ_STATE_DIM, &rq->state); } else { __clear_bit(MLX5E_RQ_STATE_DIM, &rq->state); - + synchronize_net(); mlx5e_dim_disable(rq->dim); rq->dim = NULL; } @@ -140,7 +140,7 @@ int mlx5e_dim_tx_change(struct mlx5e_txqsq *sq, bool enable) __set_bit(MLX5E_SQ_STATE_DIM, &sq->state); } else { __clear_bit(MLX5E_SQ_STATE_DIM, &sq->state); - + synchronize_net(); mlx5e_dim_disable(sq->dim); sq->dim = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 04a969128161..265c4ca85f7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -780,7 +780,7 @@ static int mlx5e_create_promisc_table(struct mlx5e_flow_steering *fs) ft_attr.max_fte = MLX5E_PROMISC_TABLE_SIZE; ft_attr.autogroup.max_num_groups = 1; ft_attr.level = MLX5E_PROMISC_FT_LEVEL; - ft_attr.prio = MLX5E_NIC_PRIO; + ft_attr.prio = MLX5E_PROMISC_PRIO; ft->t = mlx5_create_auto_grouped_flow_table(fs->ns, &ft_attr); if (IS_ERR(ft->t)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index b6ae384396b3..ad9f6fca9b6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -1076,6 +1076,7 @@ static int esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node *node, return err; } esw_qos_node_set_parent(node, parent); + node->bw_share = 0; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index a8046200d376..3dd9a6f40709 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -113,13 +113,16 @@ #define ETHTOOL_PRIO_NUM_LEVELS 1 #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) -/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy, +/* Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy, * {IPsec RoCE MPV,Alias table},IPsec RoCE policy */ -#define KERNEL_NIC_PRIO_NUM_LEVELS 11 +#define KERNEL_NIC_PRIO_NUM_LEVELS 10 #define KERNEL_NIC_NUM_PRIOS 1 -/* One more level for tc */ -#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) +/* One more level for tc, and one more for promisc */ +#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 2) + +#define KERNEL_NIC_PROMISC_NUM_PRIOS 1 +#define KERNEL_NIC_PROMISC_NUM_LEVELS 1 #define KERNEL_NIC_TC_NUM_PRIOS 1 #define KERNEL_NIC_TC_NUM_LEVELS 3 @@ -187,6 +190,8 @@ static struct init_tree_node { ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS, KERNEL_NIC_TC_NUM_LEVELS), + ADD_MULTIPLE_PRIO(KERNEL_NIC_PROMISC_NUM_PRIOS, + KERNEL_NIC_PROMISC_NUM_LEVELS), ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, KERNEL_NIC_PRIO_NUM_LEVELS))), ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS, diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index fb2e5b844c15..d76d7a945899 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -447,8 +447,10 @@ static int mlxbf_gige_probe(struct platform_device *pdev) priv->llu_plu_irq = platform_get_irq(pdev, MLXBF_GIGE_LLU_PLU_INTR_IDX); phy_irq = acpi_dev_gpio_irq_get_by(ACPI_COMPANION(&pdev->dev), "phy", 0); - if (phy_irq < 0) { - dev_err(&pdev->dev, "Error getting PHY irq. Use polling instead"); + if (phy_irq == -EPROBE_DEFER) { + err = -EPROBE_DEFER; + goto out; + } else if (phy_irq < 0) { phy_irq = PHY_POLL; } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index e2368075ab8c..4521d0483d18 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -127,11 +127,8 @@ static int fbnic_mbx_map_msg(struct fbnic_dev *fbd, int mbx_idx, return -EBUSY; addr = dma_map_single(fbd->dev, msg, PAGE_SIZE, direction); - if (dma_mapping_error(fbd->dev, addr)) { - free_page((unsigned long)msg); - + if (dma_mapping_error(fbd->dev, addr)) return -ENOSPC; - } mbx->buf_info[tail].msg = msg; mbx->buf_info[tail].addr = addr; diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.h b/drivers/net/ethernet/microchip/lan743x_ptp.h index e8d073bfa2ca..f33dc83c5700 100644 --- a/drivers/net/ethernet/microchip/lan743x_ptp.h +++ b/drivers/net/ethernet/microchip/lan743x_ptp.h @@ -18,9 +18,9 @@ */ #define LAN743X_PTP_N_EVENT_CHAN 2 #define LAN743X_PTP_N_PEROUT LAN743X_PTP_N_EVENT_CHAN -#define LAN743X_PTP_N_EXTTS 4 -#define LAN743X_PTP_N_PPS 0 #define PCI11X1X_PTP_IO_MAX_CHANNELS 8 +#define LAN743X_PTP_N_EXTTS PCI11X1X_PTP_IO_MAX_CHANNELS +#define LAN743X_PTP_N_PPS 0 #define PTP_CMD_CTL_TIMEOUT_CNT 50 struct lan743x_adapter; diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 3504507477c6..52cf7112762c 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -31,6 +31,9 @@ static void mana_gd_init_pf_regs(struct pci_dev *pdev) gc->db_page_base = gc->bar0_va + mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF); + gc->phys_db_page_base = gc->bar0_pa + + mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF); + sriov_base_off = mana_gd_r64(gc, GDMA_SRIOV_REG_CFG_BASE_OFF); sriov_base_va = gc->bar0_va + sriov_base_off; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index daf1e82cb76b..0e60a6bef99a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -516,9 +516,9 @@ static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds, unsigned long start_time; unsigned long max_wait; unsigned long duration; - int done = 0; bool fw_up; int opcode; + bool done; int err; /* Wait for dev cmd to complete, retrying if we get EAGAIN, @@ -526,6 +526,7 @@ static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds, */ max_wait = jiffies + (max_seconds * HZ); try_again: + done = false; opcode = idev->opcode; start_time = jiffies; for (fw_up = ionic_is_fw_running(idev); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 2ac59564ded1..d10b58ebf603 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -321,7 +321,7 @@ static int ionic_xdp_post_frame(struct ionic_queue *q, struct xdp_frame *frame, len, DMA_TO_DEVICE); } else /* XDP_REDIRECT */ { dma_addr = ionic_tx_map_single(q, frame->data, len); - if (!dma_addr) + if (dma_addr == DMA_MAPPING_ERROR) return -EIO; } @@ -357,7 +357,7 @@ static int ionic_xdp_post_frame(struct ionic_queue *q, struct xdp_frame *frame, } else { dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag)); - if (dma_mapping_error(q->dev, dma_addr)) { + if (dma_addr == DMA_MAPPING_ERROR) { ionic_tx_desc_unmap_bufs(q, desc_info); return -EIO; } @@ -1083,7 +1083,7 @@ static dma_addr_t ionic_tx_map_single(struct ionic_queue *q, net_warn_ratelimited("%s: DMA single map failed on %s!\n", dev_name(dev), q->name); q_to_tx_stats(q)->dma_map_err++; - return 0; + return DMA_MAPPING_ERROR; } return dma_addr; } @@ -1100,7 +1100,7 @@ static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q, net_warn_ratelimited("%s: DMA frag map failed on %s!\n", dev_name(dev), q->name); q_to_tx_stats(q)->dma_map_err++; - return 0; + return DMA_MAPPING_ERROR; } return dma_addr; } @@ -1116,7 +1116,7 @@ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb, int frag_idx; dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb)); - if (!dma_addr) + if (dma_addr == DMA_MAPPING_ERROR) return -EIO; buf_info->dma_addr = dma_addr; buf_info->len = skb_headlen(skb); @@ -1126,7 +1126,7 @@ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb, nfrags = skb_shinfo(skb)->nr_frags; for (frag_idx = 0; frag_idx < nfrags; frag_idx++, frag++) { dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag)); - if (!dma_addr) + if (dma_addr == DMA_MAPPING_ERROR) goto dma_fail; buf_info->dma_addr = dma_addr; buf_info->len = skb_frag_size(frag); diff --git a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c index f55eed092f25..7d78f072b0a1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c @@ -242,7 +242,7 @@ static int qed_mfw_get_tlv_group(u8 tlv_type, u8 *tlv_group) } /* Returns size of the data buffer or, -1 in case TLV data is not available. */ -static int +static noinline_for_stack int qed_mfw_get_gen_tlv_value(struct qed_drv_tlv_hdr *p_tlv, struct qed_mfw_tlv_generic *p_drv_buf, struct qed_tlv_parsed_buf *p_buf) @@ -304,7 +304,7 @@ qed_mfw_get_gen_tlv_value(struct qed_drv_tlv_hdr *p_tlv, return -1; } -static int +static noinline_for_stack int qed_mfw_get_eth_tlv_value(struct qed_drv_tlv_hdr *p_tlv, struct qed_mfw_tlv_eth *p_drv_buf, struct qed_tlv_parsed_buf *p_buf) @@ -438,7 +438,7 @@ qed_mfw_get_tlv_time_value(struct qed_mfw_tlv_time *p_time, return QED_MFW_TLV_TIME_SIZE; } -static int +static noinline_for_stack int qed_mfw_get_fcoe_tlv_value(struct qed_drv_tlv_hdr *p_tlv, struct qed_mfw_tlv_fcoe *p_drv_buf, struct qed_tlv_parsed_buf *p_buf) @@ -1073,7 +1073,7 @@ qed_mfw_get_fcoe_tlv_value(struct qed_drv_tlv_hdr *p_tlv, return -1; } -static int +static noinline_for_stack int qed_mfw_get_iscsi_tlv_value(struct qed_drv_tlv_hdr *p_tlv, struct qed_mfw_tlv_iscsi *p_drv_buf, struct qed_tlv_parsed_buf *p_buf) diff --git a/drivers/net/ethernet/renesas/rtsn.c b/drivers/net/ethernet/renesas/rtsn.c index 6b3f7fca8d15..05c4b6c8c9c3 100644 --- a/drivers/net/ethernet/renesas/rtsn.c +++ b/drivers/net/ethernet/renesas/rtsn.c @@ -1259,7 +1259,12 @@ static int rtsn_probe(struct platform_device *pdev) priv = netdev_priv(ndev); priv->pdev = pdev; priv->ndev = ndev; + priv->ptp_priv = rcar_gen4_ptp_alloc(pdev); + if (!priv->ptp_priv) { + ret = -ENOMEM; + goto error_free; + } spin_lock_init(&priv->lock); platform_set_drvdata(pdev, priv); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 7840bc403788..5dcc95bc0ad2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -364,19 +364,17 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, } /* TX/RX NORMAL interrupts */ - if (likely(intr_status & XGMAC_NIS)) { - if (likely(intr_status & XGMAC_RI)) { - u64_stats_update_begin(&stats->syncp); - u64_stats_inc(&stats->rx_normal_irq_n[chan]); - u64_stats_update_end(&stats->syncp); - ret |= handle_rx; - } - if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { - u64_stats_update_begin(&stats->syncp); - u64_stats_inc(&stats->tx_normal_irq_n[chan]); - u64_stats_update_end(&stats->syncp); - ret |= handle_tx; - } + if (likely(intr_status & XGMAC_RI)) { + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->rx_normal_irq_n[chan]); + u64_stats_update_end(&stats->syncp); + ret |= handle_rx; + } + if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->tx_normal_irq_n[chan]); + u64_stats_update_end(&stats->syncp); + ret |= handle_tx; } /* Clear interrupts */ diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index ddca8fc7883e..26119d02a94d 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -3336,7 +3336,7 @@ static int niu_rbr_add_page(struct niu *np, struct rx_ring_info *rp, addr = np->ops->map_page(np->device, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); - if (!addr) { + if (np->ops->mapping_error(np->device, addr)) { __free_page(page); return -ENOMEM; } @@ -6676,6 +6676,8 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, len = skb_headlen(skb); mapping = np->ops->map_single(np->device, skb->data, len, DMA_TO_DEVICE); + if (np->ops->mapping_error(np->device, mapping)) + goto out_drop; prod = rp->prod; @@ -6717,6 +6719,8 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, mapping = np->ops->map_page(np->device, skb_frag_page(frag), skb_frag_off(frag), len, DMA_TO_DEVICE); + if (np->ops->mapping_error(np->device, mapping)) + goto out_unmap; rp->tx_buffs[prod].skb = NULL; rp->tx_buffs[prod].mapping = mapping; @@ -6741,6 +6745,19 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, out: return NETDEV_TX_OK; +out_unmap: + while (i--) { + const skb_frag_t *frag; + + prod = PREVIOUS_TX(rp, prod); + frag = &skb_shinfo(skb)->frags[i]; + np->ops->unmap_page(np->device, rp->tx_buffs[prod].mapping, + skb_frag_size(frag), DMA_TO_DEVICE); + } + + np->ops->unmap_single(np->device, rp->tx_buffs[rp->prod].mapping, + skb_headlen(skb), DMA_TO_DEVICE); + out_drop: rp->tx_errors++; kfree_skb(skb); @@ -9644,6 +9661,11 @@ static void niu_pci_unmap_single(struct device *dev, u64 dma_address, dma_unmap_single(dev, dma_address, size, direction); } +static int niu_pci_mapping_error(struct device *dev, u64 addr) +{ + return dma_mapping_error(dev, addr); +} + static const struct niu_ops niu_pci_ops = { .alloc_coherent = niu_pci_alloc_coherent, .free_coherent = niu_pci_free_coherent, @@ -9651,6 +9673,7 @@ static const struct niu_ops niu_pci_ops = { .unmap_page = niu_pci_unmap_page, .map_single = niu_pci_map_single, .unmap_single = niu_pci_unmap_single, + .mapping_error = niu_pci_mapping_error, }; static void niu_driver_version(void) @@ -10019,6 +10042,11 @@ static void niu_phys_unmap_single(struct device *dev, u64 dma_address, /* Nothing to do. */ } +static int niu_phys_mapping_error(struct device *dev, u64 dma_address) +{ + return false; +} + static const struct niu_ops niu_phys_ops = { .alloc_coherent = niu_phys_alloc_coherent, .free_coherent = niu_phys_free_coherent, @@ -10026,6 +10054,7 @@ static const struct niu_ops niu_phys_ops = { .unmap_page = niu_phys_unmap_page, .map_single = niu_phys_map_single, .unmap_single = niu_phys_unmap_single, + .mapping_error = niu_phys_mapping_error, }; static int niu_of_probe(struct platform_device *op) diff --git a/drivers/net/ethernet/sun/niu.h b/drivers/net/ethernet/sun/niu.h index 04c215f91fc0..0b169c08b0f2 100644 --- a/drivers/net/ethernet/sun/niu.h +++ b/drivers/net/ethernet/sun/niu.h @@ -2879,6 +2879,9 @@ struct tx_ring_info { #define NEXT_TX(tp, index) \ (((index) + 1) < (tp)->pending ? ((index) + 1) : 0) +#define PREVIOUS_TX(tp, index) \ + (((index) - 1) >= 0 ? ((index) - 1) : (((tp)->pending) - 1)) + static inline u32 niu_tx_avail(struct tx_ring_info *tp) { return (tp->pending - @@ -3140,6 +3143,7 @@ struct niu_ops { enum dma_data_direction direction); void (*unmap_single)(struct device *dev, u64 dma_address, size_t size, enum dma_data_direction direction); + int (*mapping_error)(struct device *dev, u64 dma_address); }; struct niu_link_config { diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index f20d1ff192ef..231ca141331f 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -856,8 +856,6 @@ static struct sk_buff *am65_cpsw_build_skb(void *page_addr, { struct sk_buff *skb; - len += AM65_CPSW_HEADROOM; - skb = build_skb(page_addr, len); if (unlikely(!skb)) return NULL; @@ -1344,7 +1342,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, } skb = am65_cpsw_build_skb(page_addr, ndev, - AM65_CPSW_MAX_PACKET_SIZE, headroom); + PAGE_SIZE, headroom); if (unlikely(!skb)) { new_page = page; goto requeue; diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 5b8fdb882172..12f25cec6255 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -98,20 +98,11 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn, { struct cppi5_host_desc_t *first_desc, *next_desc; dma_addr_t buf_dma, next_desc_dma; - struct prueth_swdata *swdata; - struct page *page; u32 buf_dma_len; first_desc = desc; next_desc = first_desc; - swdata = cppi5_hdesc_get_swdata(desc); - if (swdata->type == PRUETH_SWDATA_PAGE) { - page = swdata->data.page; - page_pool_recycle_direct(page->pp, swdata->data.page); - goto free_desc; - } - cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len); k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); @@ -135,7 +126,6 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn, k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc); } -free_desc: k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc); } EXPORT_SYMBOL_GPL(prueth_xmit_free); @@ -612,13 +602,8 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma); cppi5_hdesc_attach_buf(first_desc, buf_dma, xdpf->len, buf_dma, xdpf->len); swdata = cppi5_hdesc_get_swdata(first_desc); - if (page) { - swdata->type = PRUETH_SWDATA_PAGE; - swdata->data.page = page; - } else { - swdata->type = PRUETH_SWDATA_XDPF; - swdata->data.xdpf = xdpf; - } + swdata->type = PRUETH_SWDATA_XDPF; + swdata->data.xdpf = xdpf; /* Report BQL before sending the packet */ netif_txq = netdev_get_tx_queue(ndev, tx_chn->id); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 7f2e6cddfeb1..55e252789db3 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1705,6 +1705,7 @@ static void wx_set_rss_queues(struct wx *wx) clear_bit(WX_FLAG_FDIR_HASH, wx->flags); + wx->ring_feature[RING_F_FDIR].indices = 1; /* Use Flow Director in addition to RSS to ensure the best * distribution of flows across cores, even when an FDIR flow * isn't matched. @@ -1746,7 +1747,7 @@ static void wx_set_num_queues(struct wx *wx) */ static int wx_acquire_msix_vectors(struct wx *wx) { - struct irq_affinity affd = { .pre_vectors = 1 }; + struct irq_affinity affd = { .post_vectors = 1 }; int nvecs, i; /* We start by asking for one vector per queue pair */ @@ -1783,16 +1784,24 @@ static int wx_acquire_msix_vectors(struct wx *wx) return nvecs; } - wx->msix_entry->entry = 0; - wx->msix_entry->vector = pci_irq_vector(wx->pdev, 0); nvecs -= 1; for (i = 0; i < nvecs; i++) { wx->msix_q_entries[i].entry = i; - wx->msix_q_entries[i].vector = pci_irq_vector(wx->pdev, i + 1); + wx->msix_q_entries[i].vector = pci_irq_vector(wx->pdev, i); } wx->num_q_vectors = nvecs; + wx->msix_entry->entry = nvecs; + wx->msix_entry->vector = pci_irq_vector(wx->pdev, nvecs); + + if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags)) { + wx->msix_entry->entry = 0; + wx->msix_entry->vector = pci_irq_vector(wx->pdev, 0); + wx->msix_q_entries[0].entry = 0; + wx->msix_q_entries[0].vector = pci_irq_vector(wx->pdev, 1); + } + return 0; } @@ -2291,6 +2300,8 @@ static void wx_set_ivar(struct wx *wx, s8 direction, if (direction == -1) { /* other causes */ + if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags)) + msix_vector = 0; msix_vector |= WX_PX_IVAR_ALLOC_VAL; index = 0; ivar = rd32(wx, WX_PX_MISC_IVAR); @@ -2299,8 +2310,6 @@ static void wx_set_ivar(struct wx *wx, s8 direction, wr32(wx, WX_PX_MISC_IVAR, ivar); } else { /* tx or rx causes */ - if (!(wx->mac.type == wx_mac_em && wx->num_vfs == 7)) - msix_vector += 1; /* offset for queue vectors */ msix_vector |= WX_PX_IVAR_ALLOC_VAL; index = ((16 * (queue & 1)) + (8 * direction)); ivar = rd32(wx, WX_PX_IVAR(queue >> 1)); @@ -2339,7 +2348,7 @@ void wx_write_eitr(struct wx_q_vector *q_vector) itr_reg |= WX_PX_ITR_CNT_WDIS; - wr32(wx, WX_PX_ITR(v_idx + 1), itr_reg); + wr32(wx, WX_PX_ITR(v_idx), itr_reg); } /** @@ -2392,9 +2401,9 @@ void wx_configure_vectors(struct wx *wx) wx_write_eitr(q_vector); } - wx_set_ivar(wx, -1, 0, 0); + wx_set_ivar(wx, -1, 0, v_idx); if (pdev->msix_enabled) - wr32(wx, WX_PX_ITR(0), 1950); + wr32(wx, WX_PX_ITR(v_idx), 1950); } EXPORT_SYMBOL(wx_configure_vectors); @@ -2623,7 +2632,7 @@ static int wx_alloc_page_pool(struct wx_ring *rx_ring) struct page_pool_params pp_params = { .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, .order = 0, - .pool_size = rx_ring->size, + .pool_size = rx_ring->count, .nid = dev_to_node(rx_ring->dev), .dev = rx_ring->dev, .dma_dir = DMA_FROM_DEVICE, diff --git a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c index e8656d9d733b..c82ae137756c 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c @@ -64,6 +64,7 @@ static void wx_sriov_clear_data(struct wx *wx) wr32m(wx, WX_PSR_VM_CTL, WX_PSR_VM_CTL_POOL_MASK, 0); wx->ring_feature[RING_F_VMDQ].offset = 0; + clear_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags); clear_bit(WX_FLAG_SRIOV_ENABLED, wx->flags); /* Disable VMDq flag so device will be set in NM mode */ if (wx->ring_feature[RING_F_VMDQ].limit == 1) @@ -78,6 +79,9 @@ static int __wx_enable_sriov(struct wx *wx, u8 num_vfs) set_bit(WX_FLAG_SRIOV_ENABLED, wx->flags); dev_info(&wx->pdev->dev, "SR-IOV enabled with %d VFs\n", num_vfs); + if (num_vfs == 7 && wx->mac.type == wx_mac_em) + set_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags); + /* Enable VMDq flag so device will be set in VM mode */ set_bit(WX_FLAG_VMDQ_ENABLED, wx->flags); if (!wx->ring_feature[RING_F_VMDQ].limit) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 7730c9fc3e02..c363379126c0 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -1191,6 +1191,7 @@ enum wx_pf_flags { WX_FLAG_VMDQ_ENABLED, WX_FLAG_VLAN_PROMISC, WX_FLAG_SRIOV_ENABLED, + WX_FLAG_IRQ_VECTOR_SHARED, WX_FLAG_FDIR_CAPABLE, WX_FLAG_FDIR_HASH, WX_FLAG_FDIR_PERFECT, @@ -1343,7 +1344,7 @@ struct wx { }; #define WX_INTR_ALL (~0ULL) -#define WX_INTR_Q(i) BIT((i) + 1) +#define WX_INTR_Q(i) BIT((i)) /* register operations */ #define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg))) diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index b5022c49dc5e..e0fc897b0a58 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -161,7 +161,7 @@ static void ngbe_irq_enable(struct wx *wx, bool queues) if (queues) wx_intr_enable(wx, NGBE_INTR_ALL); else - wx_intr_enable(wx, NGBE_INTR_MISC); + wx_intr_enable(wx, NGBE_INTR_MISC(wx)); } /** @@ -286,7 +286,7 @@ static int ngbe_request_msix_irqs(struct wx *wx) * for queue. But when num_vfs == 7, vector[1] is assigned to vf6. * Misc and queue should reuse interrupt vector[0]. */ - if (wx->num_vfs == 7) + if (test_bit(WX_FLAG_IRQ_VECTOR_SHARED, wx->flags)) err = request_irq(wx->msix_entry->vector, ngbe_misc_and_queue, 0, netdev->name, wx); else diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h index bb74263f0498..3b2ca7f47e33 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h @@ -87,7 +87,7 @@ #define NGBE_PX_MISC_IC_TIMESYNC BIT(11) /* time sync */ #define NGBE_INTR_ALL 0x1FF -#define NGBE_INTR_MISC BIT(0) +#define NGBE_INTR_MISC(A) BIT((A)->msix_entry->entry) #define NGBE_PHY_CONFIG(reg_offset) (0x14000 + ((reg_offset) * 4)) #define NGBE_CFG_LAN_SPEED 0x14440 diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c index 7dbcf41750c1..dc87ccad9652 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_aml.c @@ -294,6 +294,7 @@ static void txgbe_mac_link_up_aml(struct phylink_config *config, wx_fc_enable(wx, tx_pause, rx_pause); txgbe_reconfig_mac(wx); + txgbe_enable_sec_tx_path(wx); txcfg = rd32(wx, TXGBE_AML_MAC_TX_CFG); txcfg &= ~TXGBE_AML_MAC_TX_CFG_SPEED_MASK; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c index 20b9a28bcb55..3885283681ec 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c @@ -31,7 +31,7 @@ void txgbe_irq_enable(struct wx *wx, bool queues) wr32(wx, WX_PX_MISC_IEN, misc_ien); /* unmask interrupt */ - wx_intr_enable(wx, TXGBE_INTR_MISC); + wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); if (queues) wx_intr_enable(wx, TXGBE_INTR_QALL(wx)); } @@ -78,7 +78,6 @@ free_queue_irqs: free_irq(wx->msix_q_entries[vector].vector, wx->q_vector[vector]); } - wx_reset_interrupt_capability(wx); return err; } @@ -132,7 +131,7 @@ static irqreturn_t txgbe_misc_irq_handle(int irq, void *data) txgbe->eicr = eicr; if (eicr & TXGBE_PX_MISC_IC_VF_MBOX) { wx_msg_task(txgbe->wx); - wx_intr_enable(wx, TXGBE_INTR_MISC); + wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); } return IRQ_WAKE_THREAD; } @@ -184,7 +183,7 @@ static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data) nhandled++; } - wx_intr_enable(wx, TXGBE_INTR_MISC); + wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE); } @@ -211,6 +210,7 @@ void txgbe_free_misc_irq(struct txgbe *txgbe) free_irq(txgbe->link_irq, txgbe); free_irq(txgbe->misc.irq, txgbe); txgbe_del_irq_domain(txgbe); + txgbe->wx->misc_irq_domain = false; } int txgbe_setup_misc_irq(struct txgbe *txgbe) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index f3d2778b8e35..a5867f3c93fc 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -458,10 +458,14 @@ static int txgbe_open(struct net_device *netdev) wx_configure(wx); - err = txgbe_request_queue_irqs(wx); + err = txgbe_setup_misc_irq(wx->priv); if (err) goto err_free_resources; + err = txgbe_request_queue_irqs(wx); + if (err) + goto err_free_misc_irq; + /* Notify the stack of the actual queue counts. */ err = netif_set_real_num_tx_queues(netdev, wx->num_tx_queues); if (err) @@ -479,6 +483,9 @@ static int txgbe_open(struct net_device *netdev) err_free_irq: wx_free_irq(wx); +err_free_misc_irq: + txgbe_free_misc_irq(wx->priv); + wx_reset_interrupt_capability(wx); err_free_resources: wx_free_resources(wx); err_reset: @@ -519,6 +526,7 @@ static int txgbe_close(struct net_device *netdev) wx_ptp_stop(wx); txgbe_down(wx); wx_free_irq(wx); + txgbe_free_misc_irq(wx->priv); wx_free_resources(wx); txgbe_fdir_filter_exit(wx); wx_control_hw(wx, false); @@ -564,7 +572,6 @@ static void txgbe_shutdown(struct pci_dev *pdev) int txgbe_setup_tc(struct net_device *dev, u8 tc) { struct wx *wx = netdev_priv(dev); - struct txgbe *txgbe = wx->priv; /* Hardware has to reinitialize queues and interrupts to * match packet buffer alignment. Unfortunately, the @@ -575,7 +582,6 @@ int txgbe_setup_tc(struct net_device *dev, u8 tc) else txgbe_reset(wx); - txgbe_free_misc_irq(txgbe); wx_clear_interrupt_scheme(wx); if (tc) @@ -584,7 +590,6 @@ int txgbe_setup_tc(struct net_device *dev, u8 tc) netdev_reset_tc(dev); wx_init_interrupt_scheme(wx); - txgbe_setup_misc_irq(txgbe); if (netif_running(dev)) txgbe_open(dev); @@ -882,13 +887,9 @@ static int txgbe_probe(struct pci_dev *pdev, txgbe_init_fdir(txgbe); - err = txgbe_setup_misc_irq(txgbe); - if (err) - goto err_release_hw; - err = txgbe_init_phy(txgbe); if (err) - goto err_free_misc_irq; + goto err_release_hw; err = register_netdev(netdev); if (err) @@ -916,8 +917,6 @@ static int txgbe_probe(struct pci_dev *pdev, err_remove_phy: txgbe_remove_phy(txgbe); -err_free_misc_irq: - txgbe_free_misc_irq(txgbe); err_release_hw: wx_clear_interrupt_scheme(wx); wx_control_hw(wx, false); @@ -957,7 +956,6 @@ static void txgbe_remove(struct pci_dev *pdev) unregister_netdev(netdev); txgbe_remove_phy(txgbe); - txgbe_free_misc_irq(txgbe); wx_free_isb_resources(wx); pci_release_selected_regions(pdev, diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 42ec815159e8..41915d7dd372 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -302,8 +302,8 @@ struct txgbe_fdir_filter { #define TXGBE_DEFAULT_RX_WORK 128 #endif -#define TXGBE_INTR_MISC BIT(0) -#define TXGBE_INTR_QALL(A) GENMASK((A)->num_q_vectors, 1) +#define TXGBE_INTR_MISC(A) BIT((A)->num_q_vectors) +#define TXGBE_INTR_QALL(A) (TXGBE_INTR_MISC(A) - 1) #define TXGBE_MAX_EITR GENMASK(11, 3) diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index edb36ff07a0c..6f82203a414c 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1309,7 +1309,7 @@ ll_temac_ethtools_set_ringparam(struct net_device *ndev, if (ering->rx_pending > RX_BD_NUM_MAX || ering->rx_mini_pending || ering->rx_jumbo_pending || - ering->rx_pending > TX_BD_NUM_MAX) + ering->tx_pending > TX_BD_NUM_MAX) return -EINVAL; if (netif_running(ndev)) diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 13570f628aa5..dc8634e7bcbe 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -332,7 +332,7 @@ static void lan88xx_link_change_notify(struct phy_device *phydev) * As workaround, set to 10 before setting to 100 * at forced 100 F/H mode. */ - if (!phydev->autoneg && phydev->speed == 100) { + if (phydev->state == PHY_NOLINK && !phydev->autoneg && phydev->speed == 100) { /* disable phy interrupt */ temp = phy_read(phydev, LAN88XX_INT_MASK); temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_; @@ -488,6 +488,7 @@ static struct phy_driver microchip_phy_driver[] = { .config_init = lan88xx_config_init, .config_aneg = lan88xx_config_aneg, .link_change_notify = lan88xx_link_change_notify, + .soft_reset = genphy_soft_reset, /* Interrupt handling is broken, do not define related * functions to force polling. diff --git a/drivers/net/phy/qcom/at803x.c b/drivers/net/phy/qcom/at803x.c index 26350b962890..8f26e395e39f 100644 --- a/drivers/net/phy/qcom/at803x.c +++ b/drivers/net/phy/qcom/at803x.c @@ -26,9 +26,6 @@ #define AT803X_LED_CONTROL 0x18 -#define AT803X_PHY_MMD3_WOL_CTRL 0x8012 -#define AT803X_WOL_EN BIT(5) - #define AT803X_REG_CHIP_CONFIG 0x1f #define AT803X_BT_BX_REG_SEL 0x8000 @@ -866,30 +863,6 @@ static int at8031_config_init(struct phy_device *phydev) return at803x_config_init(phydev); } -static int at8031_set_wol(struct phy_device *phydev, - struct ethtool_wolinfo *wol) -{ - int ret; - - /* First setup MAC address and enable WOL interrupt */ - ret = at803x_set_wol(phydev, wol); - if (ret) - return ret; - - if (wol->wolopts & WAKE_MAGIC) - /* Enable WOL function for 1588 */ - ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, - AT803X_PHY_MMD3_WOL_CTRL, - 0, AT803X_WOL_EN); - else - /* Disable WoL function for 1588 */ - ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, - AT803X_PHY_MMD3_WOL_CTRL, - AT803X_WOL_EN, 0); - - return ret; -} - static int at8031_config_intr(struct phy_device *phydev) { struct at803x_priv *priv = phydev->priv; diff --git a/drivers/net/phy/qcom/qca808x.c b/drivers/net/phy/qcom/qca808x.c index 71498c518f0f..6de16c0eaa08 100644 --- a/drivers/net/phy/qcom/qca808x.c +++ b/drivers/net/phy/qcom/qca808x.c @@ -633,7 +633,7 @@ static struct phy_driver qca808x_driver[] = { .handle_interrupt = at803x_handle_interrupt, .get_tunable = at803x_get_tunable, .set_tunable = at803x_set_tunable, - .set_wol = at803x_set_wol, + .set_wol = at8031_set_wol, .get_wol = at803x_get_wol, .get_features = qca808x_get_features, .config_aneg = qca808x_config_aneg, diff --git a/drivers/net/phy/qcom/qcom-phy-lib.c b/drivers/net/phy/qcom/qcom-phy-lib.c index d28815ef56bb..af7d0d8e81be 100644 --- a/drivers/net/phy/qcom/qcom-phy-lib.c +++ b/drivers/net/phy/qcom/qcom-phy-lib.c @@ -115,6 +115,31 @@ int at803x_set_wol(struct phy_device *phydev, } EXPORT_SYMBOL_GPL(at803x_set_wol); +int at8031_set_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol) +{ + int ret; + + /* First setup MAC address and enable WOL interrupt */ + ret = at803x_set_wol(phydev, wol); + if (ret) + return ret; + + if (wol->wolopts & WAKE_MAGIC) + /* Enable WOL function for 1588 */ + ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, + AT803X_PHY_MMD3_WOL_CTRL, + 0, AT803X_WOL_EN); + else + /* Disable WoL function for 1588 */ + ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, + AT803X_PHY_MMD3_WOL_CTRL, + AT803X_WOL_EN, 0); + + return ret; +} +EXPORT_SYMBOL_GPL(at8031_set_wol); + void at803x_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol) { diff --git a/drivers/net/phy/qcom/qcom.h b/drivers/net/phy/qcom/qcom.h index 4bb541728846..7f7151c8baca 100644 --- a/drivers/net/phy/qcom/qcom.h +++ b/drivers/net/phy/qcom/qcom.h @@ -172,6 +172,9 @@ #define AT803X_LOC_MAC_ADDR_16_31_OFFSET 0x804B #define AT803X_LOC_MAC_ADDR_32_47_OFFSET 0x804A +#define AT803X_PHY_MMD3_WOL_CTRL 0x8012 +#define AT803X_WOL_EN BIT(5) + #define AT803X_DEBUG_ADDR 0x1D #define AT803X_DEBUG_DATA 0x1E @@ -215,6 +218,8 @@ int at803x_debug_reg_mask(struct phy_device *phydev, u16 reg, int at803x_debug_reg_write(struct phy_device *phydev, u16 reg, u16 data); int at803x_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); +int at8031_set_wol(struct phy_device *phydev, + struct ethtool_wolinfo *wol); void at803x_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); int at803x_ack_interrupt(struct phy_device *phydev); diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 31463b9e5697..b6489da5cfcd 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -155,10 +155,29 @@ static int smsc_phy_reset(struct phy_device *phydev) static int lan87xx_config_aneg(struct phy_device *phydev) { - int rc; + u8 mdix_ctrl; int val; + int rc; + + /* When auto-negotiation is disabled (forced mode), the PHY's + * Auto-MDIX will continue toggling the TX/RX pairs. + * + * To establish a stable link, we must select a fixed MDI mode. + * If the user has not specified a fixed MDI mode (i.e., mdix_ctrl is + * 'auto'), we default to ETH_TP_MDI. This choice of a ETH_TP_MDI mode + * mirrors the behavior the hardware would exhibit if the AUTOMDIX_EN + * strap were configured for a fixed MDI connection. + */ + if (phydev->autoneg == AUTONEG_DISABLE) { + if (phydev->mdix_ctrl == ETH_TP_MDI_AUTO) + mdix_ctrl = ETH_TP_MDI; + else + mdix_ctrl = phydev->mdix_ctrl; + } else { + mdix_ctrl = phydev->mdix_ctrl; + } - switch (phydev->mdix_ctrl) { + switch (mdix_ctrl) { case ETH_TP_MDI: val = SPECIAL_CTRL_STS_OVRRD_AMDIX_; break; @@ -167,7 +186,8 @@ static int lan87xx_config_aneg(struct phy_device *phydev) SPECIAL_CTRL_STS_AMDIX_STATE_; break; case ETH_TP_MDI_AUTO: - val = SPECIAL_CTRL_STS_AMDIX_ENABLE_; + val = SPECIAL_CTRL_STS_OVRRD_AMDIX_ | + SPECIAL_CTRL_STS_AMDIX_ENABLE_; break; default: return genphy_config_aneg(phydev); @@ -183,7 +203,7 @@ static int lan87xx_config_aneg(struct phy_device *phydev) rc |= val; phy_write(phydev, SPECIAL_CTRL_STS, rc); - phydev->mdix = phydev->mdix_ctrl; + phydev->mdix = mdix_ctrl; return genphy_config_aneg(phydev); } @@ -261,6 +281,33 @@ int lan87xx_read_status(struct phy_device *phydev) } EXPORT_SYMBOL_GPL(lan87xx_read_status); +static int lan87xx_phy_config_init(struct phy_device *phydev) +{ + int rc; + + /* The LAN87xx PHY's initial MDI-X mode is determined by the AUTOMDIX_EN + * hardware strap, but the driver cannot read the strap's status. This + * creates an unpredictable initial state. + * + * To ensure consistent and reliable behavior across all boards, + * override the strap configuration on initialization and force the PHY + * into a known state with Auto-MDIX enabled, which is the expected + * default for modern hardware. + */ + rc = phy_modify(phydev, SPECIAL_CTRL_STS, + SPECIAL_CTRL_STS_OVRRD_AMDIX_ | + SPECIAL_CTRL_STS_AMDIX_ENABLE_ | + SPECIAL_CTRL_STS_AMDIX_STATE_, + SPECIAL_CTRL_STS_OVRRD_AMDIX_ | + SPECIAL_CTRL_STS_AMDIX_ENABLE_); + if (rc < 0) + return rc; + + phydev->mdix_ctrl = ETH_TP_MDI_AUTO; + + return smsc_phy_config_init(phydev); +} + static int lan874x_phy_config_init(struct phy_device *phydev) { u16 val; @@ -695,7 +742,7 @@ static struct phy_driver smsc_phy_driver[] = { /* basic functions */ .read_status = lan87xx_read_status, - .config_init = smsc_phy_config_init, + .config_init = lan87xx_phy_config_init, .soft_reset = smsc_phy_reset, .config_aneg = lan87xx_config_aneg, diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index f53e255116ea..e3ca6e91efe1 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -4567,8 +4567,6 @@ static void lan78xx_disconnect(struct usb_interface *intf) if (!dev) return; - netif_napi_del(&dev->napi); - udev = interface_to_usbdev(intf); net = dev->net; diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index b586b1c13a47..f5647ee0adde 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1426,6 +1426,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x22de, 0x9051, 2)}, /* Hucom Wireless HM-211S/K */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ {QMI_QUIRK_SET_DTR(0x1e0e, 0x9001, 5)}, /* SIMCom 7100E, 7230E, 7600E ++ */ + {QMI_QUIRK_SET_DTR(0x1e0e, 0x9071, 3)}, /* SIMCom 8230C ++ */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0195, 4)}, /* Quectel EG95 */ diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index e53ba600605a..5d674eb9a0f2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -778,6 +778,26 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1); } +static int check_mergeable_len(struct net_device *dev, void *mrg_ctx, + unsigned int len) +{ + unsigned int headroom, tailroom, room, truesize; + + truesize = mergeable_ctx_to_truesize(mrg_ctx); + headroom = mergeable_ctx_to_headroom(mrg_ctx); + tailroom = headroom ? sizeof(struct skb_shared_info) : 0; + room = SKB_DATA_ALIGN(headroom + tailroom); + + if (len > truesize - room) { + pr_debug("%s: rx error: len %u exceeds truesize %lu\n", + dev->name, len, (unsigned long)(truesize - room)); + DEV_STATS_INC(dev, rx_length_errors); + return -1; + } + + return 0; +} + static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen, unsigned int headroom, unsigned int len) @@ -1084,7 +1104,7 @@ static bool tx_may_stop(struct virtnet_info *vi, * Since most packets only take 1 or 2 ring slots, stopping the queue * early means 16 slots are typically wasted. */ - if (sq->vq->num_free < 2+MAX_SKB_FRAGS) { + if (sq->vq->num_free < MAX_SKB_FRAGS + 2) { struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum); netif_tx_stop_queue(txq); @@ -1116,7 +1136,7 @@ static void check_sq_full_and_disable(struct virtnet_info *vi, } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { /* More just got used, free them then recheck. */ free_old_xmit(sq, txq, false); - if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { + if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { netif_start_subqueue(dev, qnum); u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); @@ -1127,15 +1147,29 @@ static void check_sq_full_and_disable(struct virtnet_info *vi, } } +/* Note that @len is the length of received data without virtio header */ static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi, - struct receive_queue *rq, void *buf, u32 len) + struct receive_queue *rq, void *buf, + u32 len, bool first_buf) { struct xdp_buff *xdp; u32 bufsize; xdp = (struct xdp_buff *)buf; - bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool) + vi->hdr_len; + /* In virtnet_add_recvbuf_xsk, we use part of XDP_PACKET_HEADROOM for + * virtio header and ask the vhost to fill data from + * hard_start + XDP_PACKET_HEADROOM - vi->hdr_len + * The first buffer has virtio header so the remaining region for frame + * data is + * xsk_pool_get_rx_frame_size() + * While other buffers than the first one do not have virtio header, so + * the maximum frame data's length can be + * xsk_pool_get_rx_frame_size() + vi->hdr_len + */ + bufsize = xsk_pool_get_rx_frame_size(rq->xsk_pool); + if (!first_buf) + bufsize += vi->hdr_len; if (unlikely(len > bufsize)) { pr_debug("%s: rx error: len %u exceeds truesize %u\n", @@ -1260,7 +1294,7 @@ static int xsk_append_merge_buffer(struct virtnet_info *vi, u64_stats_add(&stats->bytes, len); - xdp = buf_to_xdp(vi, rq, buf, len); + xdp = buf_to_xdp(vi, rq, buf, len, false); if (!xdp) goto err; @@ -1358,7 +1392,7 @@ static void virtnet_receive_xsk_buf(struct virtnet_info *vi, struct receive_queu u64_stats_add(&stats->bytes, len); - xdp = buf_to_xdp(vi, rq, buf, len); + xdp = buf_to_xdp(vi, rq, buf, len, true); if (!xdp) return; @@ -1797,7 +1831,8 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi) * across multiple buffers (num_buf > 1), and we make sure buffers * have enough headroom. */ -static struct page *xdp_linearize_page(struct receive_queue *rq, +static struct page *xdp_linearize_page(struct net_device *dev, + struct receive_queue *rq, int *num_buf, struct page *p, int offset, @@ -1817,18 +1852,27 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, memcpy(page_address(page) + page_off, page_address(p) + offset, *len); page_off += *len; + /* Only mergeable mode can go inside this while loop. In small mode, + * *num_buf == 1, so it cannot go inside. + */ while (--*num_buf) { unsigned int buflen; void *buf; + void *ctx; int off; - buf = virtnet_rq_get_buf(rq, &buflen, NULL); + buf = virtnet_rq_get_buf(rq, &buflen, &ctx); if (unlikely(!buf)) goto err_buf; p = virt_to_head_page(buf); off = buf - page_address(p); + if (check_mergeable_len(dev, ctx, buflen)) { + put_page(p); + goto err_buf; + } + /* guard against a misconfigured or uncooperative backend that * is sending packet larger than the MTU. */ @@ -1917,7 +1961,7 @@ static struct sk_buff *receive_small_xdp(struct net_device *dev, headroom = vi->hdr_len + header_offset; buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - xdp_page = xdp_linearize_page(rq, &num_buf, page, + xdp_page = xdp_linearize_page(dev, rq, &num_buf, page, offset, header_offset, &tlen); if (!xdp_page) @@ -2126,10 +2170,9 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev, struct virtnet_rq_stats *stats) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf; - unsigned int headroom, tailroom, room; - unsigned int truesize, cur_frag_size; struct skb_shared_info *shinfo; unsigned int xdp_frags_truesz = 0; + unsigned int truesize; struct page *page; skb_frag_t *frag; int offset; @@ -2172,21 +2215,14 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev, page = virt_to_head_page(buf); offset = buf - page_address(page); - truesize = mergeable_ctx_to_truesize(ctx); - headroom = mergeable_ctx_to_headroom(ctx); - tailroom = headroom ? sizeof(struct skb_shared_info) : 0; - room = SKB_DATA_ALIGN(headroom + tailroom); - - cur_frag_size = truesize; - xdp_frags_truesz += cur_frag_size; - if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { + if (check_mergeable_len(dev, ctx, len)) { put_page(page); - pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)(truesize - room)); - DEV_STATS_INC(dev, rx_length_errors); goto err; } + truesize = mergeable_ctx_to_truesize(ctx); + xdp_frags_truesz += truesize; + frag = &shinfo->frags[shinfo->nr_frags++]; skb_frag_fill_page_desc(frag, page, offset, len); if (page_is_pfmemalloc(page)) @@ -2252,7 +2288,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info *vi, */ if (!xdp_prog->aux->xdp_has_frags) { /* linearize data for XDP */ - xdp_page = xdp_linearize_page(rq, num_buf, + xdp_page = xdp_linearize_page(vi->dev, rq, num_buf, *page, offset, XDP_PACKET_HEADROOM, len); @@ -2400,18 +2436,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct sk_buff *head_skb, *curr_skb; unsigned int truesize = mergeable_ctx_to_truesize(ctx); unsigned int headroom = mergeable_ctx_to_headroom(ctx); - unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; - unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); head_skb = NULL; u64_stats_add(&stats->bytes, len - vi->hdr_len); - if (unlikely(len > truesize - room)) { - pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)(truesize - room)); - DEV_STATS_INC(dev, rx_length_errors); + if (check_mergeable_len(dev, ctx, len)) goto err_skb; - } if (unlikely(vi->xdp_enabled)) { struct bpf_prog *xdp_prog; @@ -2446,17 +2476,10 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, u64_stats_add(&stats->bytes, len); page = virt_to_head_page(buf); - truesize = mergeable_ctx_to_truesize(ctx); - headroom = mergeable_ctx_to_headroom(ctx); - tailroom = headroom ? sizeof(struct skb_shared_info) : 0; - room = SKB_DATA_ALIGN(headroom + tailroom); - if (unlikely(len > truesize - room)) { - pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)(truesize - room)); - DEV_STATS_INC(dev, rx_length_errors); + if (check_mergeable_len(dev, ctx, len)) goto err_skb; - } + truesize = mergeable_ctx_to_truesize(ctx); curr_skb = virtnet_skb_append_frag(head_skb, curr_skb, page, buf, len, truesize); if (!curr_skb) @@ -2998,7 +3021,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq, int budget) free_old_xmit(sq, txq, !!budget); } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq))); - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { + if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { if (netif_tx_queue_stopped(txq)) { u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); @@ -3195,7 +3218,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) else free_old_xmit(sq, txq, !!budget); - if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) { + if (sq->vq->num_free >= MAX_SKB_FRAGS + 2) { if (netif_tx_queue_stopped(txq)) { u64_stats_update_begin(&sq->stats.syncp); u64_stats_inc(&sq->stats.wake); @@ -3481,6 +3504,12 @@ static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, { int qindex, err; + if (ring_num <= MAX_SKB_FRAGS + 2) { + netdev_err(vi->dev, "tx size (%d) cannot be smaller than %d\n", + ring_num, MAX_SKB_FRAGS + 2); + return -EINVAL; + } + qindex = sq - vi->sq; virtnet_tx_pause(vi, sq); diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index ebc0560d40e3..89ae80934b30 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -1216,6 +1216,7 @@ void ath12k_fw_stats_init(struct ath12k *ar) INIT_LIST_HEAD(&ar->fw_stats.pdevs); INIT_LIST_HEAD(&ar->fw_stats.bcn); init_completion(&ar->fw_stats_complete); + init_completion(&ar->fw_stats_done); } void ath12k_fw_stats_free(struct ath12k_fw_stats *stats) @@ -1228,8 +1229,9 @@ void ath12k_fw_stats_free(struct ath12k_fw_stats *stats) void ath12k_fw_stats_reset(struct ath12k *ar) { spin_lock_bh(&ar->data_lock); - ar->fw_stats.fw_stats_done = false; ath12k_fw_stats_free(&ar->fw_stats); + ar->fw_stats.num_vdev_recvd = 0; + ar->fw_stats.num_bcn_recvd = 0; spin_unlock_bh(&ar->data_lock); } diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 941db6e49d6e..7bcd9c70309f 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -601,6 +601,12 @@ struct ath12k_sta { #define ATH12K_NUM_CHANS 101 #define ATH12K_MAX_5GHZ_CHAN 173 +static inline bool ath12k_is_2ghz_channel_freq(u32 freq) +{ + return freq >= ATH12K_MIN_2GHZ_FREQ && + freq <= ATH12K_MAX_2GHZ_FREQ; +} + enum ath12k_hw_state { ATH12K_HW_STATE_OFF, ATH12K_HW_STATE_ON, @@ -626,7 +632,8 @@ struct ath12k_fw_stats { struct list_head pdevs; struct list_head vdevs; struct list_head bcn; - bool fw_stats_done; + u32 num_vdev_recvd; + u32 num_bcn_recvd; }; struct ath12k_dbg_htt_stats { @@ -806,6 +813,7 @@ struct ath12k { bool regdom_set_by_user; struct completion fw_stats_complete; + struct completion fw_stats_done; struct completion mlo_setup_done; u32 mlo_setup_status; diff --git a/drivers/net/wireless/ath/ath12k/debugfs.c b/drivers/net/wireless/ath/ath12k/debugfs.c index dd624d73b8b2..23da93afaa5c 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs.c +++ b/drivers/net/wireless/ath/ath12k/debugfs.c @@ -1251,64 +1251,6 @@ void ath12k_debugfs_soc_destroy(struct ath12k_base *ab) */ } -void -ath12k_debugfs_fw_stats_process(struct ath12k *ar, - struct ath12k_fw_stats *stats) -{ - struct ath12k_base *ab = ar->ab; - struct ath12k_pdev *pdev; - bool is_end; - static unsigned int num_vdev, num_bcn; - size_t total_vdevs_started = 0; - int i; - - if (stats->stats_id == WMI_REQUEST_VDEV_STAT) { - if (list_empty(&stats->vdevs)) { - ath12k_warn(ab, "empty vdev stats"); - return; - } - /* FW sends all the active VDEV stats irrespective of PDEV, - * hence limit until the count of all VDEVs started - */ - rcu_read_lock(); - for (i = 0; i < ab->num_radios; i++) { - pdev = rcu_dereference(ab->pdevs_active[i]); - if (pdev && pdev->ar) - total_vdevs_started += pdev->ar->num_started_vdevs; - } - rcu_read_unlock(); - - is_end = ((++num_vdev) == total_vdevs_started); - - list_splice_tail_init(&stats->vdevs, - &ar->fw_stats.vdevs); - - if (is_end) { - ar->fw_stats.fw_stats_done = true; - num_vdev = 0; - } - return; - } - if (stats->stats_id == WMI_REQUEST_BCN_STAT) { - if (list_empty(&stats->bcn)) { - ath12k_warn(ab, "empty beacon stats"); - return; - } - /* Mark end until we reached the count of all started VDEVs - * within the PDEV - */ - is_end = ((++num_bcn) == ar->num_started_vdevs); - - list_splice_tail_init(&stats->bcn, - &ar->fw_stats.bcn); - - if (is_end) { - ar->fw_stats.fw_stats_done = true; - num_bcn = 0; - } - } -} - static int ath12k_open_vdev_stats(struct inode *inode, struct file *file) { struct ath12k *ar = inode->i_private; diff --git a/drivers/net/wireless/ath/ath12k/debugfs.h b/drivers/net/wireless/ath/ath12k/debugfs.h index ebef7dace344..21641a8a0346 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs.h +++ b/drivers/net/wireless/ath/ath12k/debugfs.h @@ -12,8 +12,6 @@ void ath12k_debugfs_soc_create(struct ath12k_base *ab); void ath12k_debugfs_soc_destroy(struct ath12k_base *ab); void ath12k_debugfs_register(struct ath12k *ar); void ath12k_debugfs_unregister(struct ath12k *ar); -void ath12k_debugfs_fw_stats_process(struct ath12k *ar, - struct ath12k_fw_stats *stats); void ath12k_debugfs_op_vif_add(struct ieee80211_hw *hw, struct ieee80211_vif *vif); void ath12k_debugfs_pdev_create(struct ath12k_base *ab); @@ -126,11 +124,6 @@ static inline void ath12k_debugfs_unregister(struct ath12k *ar) { } -static inline void ath12k_debugfs_fw_stats_process(struct ath12k *ar, - struct ath12k_fw_stats *stats) -{ -} - static inline bool ath12k_debugfs_is_extd_rx_stats_enabled(struct ath12k *ar) { return false; diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 88b59f3ff87a..59ec422992d3 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4360,7 +4360,7 @@ int ath12k_mac_get_fw_stats(struct ath12k *ar, { struct ath12k_base *ab = ar->ab; struct ath12k_hw *ah = ath12k_ar_to_ah(ar); - unsigned long timeout, time_left; + unsigned long time_left; int ret; guard(mutex)(&ah->hw_mutex); @@ -4368,19 +4368,13 @@ int ath12k_mac_get_fw_stats(struct ath12k *ar, if (ah->state != ATH12K_HW_STATE_ON) return -ENETDOWN; - /* FW stats can get split when exceeding the stats data buffer limit. - * In that case, since there is no end marking for the back-to-back - * received 'update stats' event, we keep a 3 seconds timeout in case, - * fw_stats_done is not marked yet - */ - timeout = jiffies + msecs_to_jiffies(3 * 1000); ath12k_fw_stats_reset(ar); reinit_completion(&ar->fw_stats_complete); + reinit_completion(&ar->fw_stats_done); ret = ath12k_wmi_send_stats_request_cmd(ar, param->stats_id, param->vdev_id, param->pdev_id); - if (ret) { ath12k_warn(ab, "failed to request fw stats: %d\n", ret); return ret; @@ -4391,7 +4385,6 @@ int ath12k_mac_get_fw_stats(struct ath12k *ar, param->pdev_id, param->vdev_id, param->stats_id); time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); - if (!time_left) { ath12k_warn(ab, "time out while waiting for get fw stats\n"); return -ETIMEDOUT; @@ -4400,20 +4393,15 @@ int ath12k_mac_get_fw_stats(struct ath12k *ar, /* Firmware sends WMI_UPDATE_STATS_EVENTID back-to-back * when stats data buffer limit is reached. fw_stats_complete * is completed once host receives first event from firmware, but - * still end might not be marked in the TLV. - * Below loop is to confirm that firmware completed sending all the event - * and fw_stats_done is marked true when end is marked in the TLV. + * still there could be more events following. Below is to wait + * until firmware completes sending all the events. */ - for (;;) { - if (time_after(jiffies, timeout)) - break; - spin_lock_bh(&ar->data_lock); - if (ar->fw_stats.fw_stats_done) { - spin_unlock_bh(&ar->data_lock); - break; - } - spin_unlock_bh(&ar->data_lock); + time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ); + if (!time_left) { + ath12k_warn(ab, "time out while waiting for fw stats done\n"); + return -ETIMEDOUT; } + return 0; } @@ -5890,6 +5878,327 @@ exit: return ret; } +static bool ath12k_mac_is_freq_on_mac(struct ath12k_hw_mode_freq_range_arg *freq_range, + u32 freq, u8 mac_id) +{ + return (freq >= freq_range[mac_id].low_2ghz_freq && + freq <= freq_range[mac_id].high_2ghz_freq) || + (freq >= freq_range[mac_id].low_5ghz_freq && + freq <= freq_range[mac_id].high_5ghz_freq); +} + +static bool +ath12k_mac_2_freq_same_mac_in_freq_range(struct ath12k_base *ab, + struct ath12k_hw_mode_freq_range_arg *freq_range, + u32 freq_link1, u32 freq_link2) +{ + u8 i; + + for (i = 0; i < MAX_RADIOS; i++) { + if (ath12k_mac_is_freq_on_mac(freq_range, freq_link1, i) && + ath12k_mac_is_freq_on_mac(freq_range, freq_link2, i)) + return true; + } + + return false; +} + +static bool ath12k_mac_is_hw_dbs_capable(struct ath12k_base *ab) +{ + return test_bit(WMI_TLV_SERVICE_DUAL_BAND_SIMULTANEOUS_SUPPORT, + ab->wmi_ab.svc_map) && + ab->wmi_ab.hw_mode_info.support_dbs; +} + +static bool ath12k_mac_2_freq_same_mac_in_dbs(struct ath12k_base *ab, + u32 freq_link1, u32 freq_link2) +{ + struct ath12k_hw_mode_freq_range_arg *freq_range; + + if (!ath12k_mac_is_hw_dbs_capable(ab)) + return true; + + freq_range = ab->wmi_ab.hw_mode_info.freq_range_caps[ATH12K_HW_MODE_DBS]; + return ath12k_mac_2_freq_same_mac_in_freq_range(ab, freq_range, + freq_link1, freq_link2); +} + +static bool ath12k_mac_is_hw_sbs_capable(struct ath12k_base *ab) +{ + return test_bit(WMI_TLV_SERVICE_DUAL_BAND_SIMULTANEOUS_SUPPORT, + ab->wmi_ab.svc_map) && + ab->wmi_ab.hw_mode_info.support_sbs; +} + +static bool ath12k_mac_2_freq_same_mac_in_sbs(struct ath12k_base *ab, + u32 freq_link1, u32 freq_link2) +{ + struct ath12k_hw_mode_info *info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *sbs_uppr_share; + struct ath12k_hw_mode_freq_range_arg *sbs_low_share; + struct ath12k_hw_mode_freq_range_arg *sbs_range; + + if (!ath12k_mac_is_hw_sbs_capable(ab)) + return true; + + if (ab->wmi_ab.sbs_lower_band_end_freq) { + sbs_uppr_share = info->freq_range_caps[ATH12K_HW_MODE_SBS_UPPER_SHARE]; + sbs_low_share = info->freq_range_caps[ATH12K_HW_MODE_SBS_LOWER_SHARE]; + + return ath12k_mac_2_freq_same_mac_in_freq_range(ab, sbs_low_share, + freq_link1, freq_link2) || + ath12k_mac_2_freq_same_mac_in_freq_range(ab, sbs_uppr_share, + freq_link1, freq_link2); + } + + sbs_range = info->freq_range_caps[ATH12K_HW_MODE_SBS]; + return ath12k_mac_2_freq_same_mac_in_freq_range(ab, sbs_range, + freq_link1, freq_link2); +} + +static bool ath12k_mac_freqs_on_same_mac(struct ath12k_base *ab, + u32 freq_link1, u32 freq_link2) +{ + return ath12k_mac_2_freq_same_mac_in_dbs(ab, freq_link1, freq_link2) && + ath12k_mac_2_freq_same_mac_in_sbs(ab, freq_link1, freq_link2); +} + +static int ath12k_mac_mlo_sta_set_link_active(struct ath12k_base *ab, + enum wmi_mlo_link_force_reason reason, + enum wmi_mlo_link_force_mode mode, + u8 *mlo_vdev_id_lst, + u8 num_mlo_vdev, + u8 *mlo_inactive_vdev_lst, + u8 num_mlo_inactive_vdev) +{ + struct wmi_mlo_link_set_active_arg param = {0}; + u32 entry_idx, entry_offset, vdev_idx; + u8 vdev_id; + + param.reason = reason; + param.force_mode = mode; + + for (vdev_idx = 0; vdev_idx < num_mlo_vdev; vdev_idx++) { + vdev_id = mlo_vdev_id_lst[vdev_idx]; + entry_idx = vdev_id / 32; + entry_offset = vdev_id % 32; + if (entry_idx >= WMI_MLO_LINK_NUM_SZ) { + ath12k_warn(ab, "Invalid entry_idx %d num_mlo_vdev %d vdev %d", + entry_idx, num_mlo_vdev, vdev_id); + return -EINVAL; + } + param.vdev_bitmap[entry_idx] |= 1 << entry_offset; + /* update entry number if entry index changed */ + if (param.num_vdev_bitmap < entry_idx + 1) + param.num_vdev_bitmap = entry_idx + 1; + } + + ath12k_dbg(ab, ATH12K_DBG_MAC, + "num_vdev_bitmap %d vdev_bitmap[0] = 0x%x, vdev_bitmap[1] = 0x%x", + param.num_vdev_bitmap, param.vdev_bitmap[0], param.vdev_bitmap[1]); + + if (mode == WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE) { + for (vdev_idx = 0; vdev_idx < num_mlo_inactive_vdev; vdev_idx++) { + vdev_id = mlo_inactive_vdev_lst[vdev_idx]; + entry_idx = vdev_id / 32; + entry_offset = vdev_id % 32; + if (entry_idx >= WMI_MLO_LINK_NUM_SZ) { + ath12k_warn(ab, "Invalid entry_idx %d num_mlo_vdev %d vdev %d", + entry_idx, num_mlo_inactive_vdev, vdev_id); + return -EINVAL; + } + param.inactive_vdev_bitmap[entry_idx] |= 1 << entry_offset; + /* update entry number if entry index changed */ + if (param.num_inactive_vdev_bitmap < entry_idx + 1) + param.num_inactive_vdev_bitmap = entry_idx + 1; + } + + ath12k_dbg(ab, ATH12K_DBG_MAC, + "num_vdev_bitmap %d inactive_vdev_bitmap[0] = 0x%x, inactive_vdev_bitmap[1] = 0x%x", + param.num_inactive_vdev_bitmap, + param.inactive_vdev_bitmap[0], + param.inactive_vdev_bitmap[1]); + } + + if (mode == WMI_MLO_LINK_FORCE_MODE_ACTIVE_LINK_NUM || + mode == WMI_MLO_LINK_FORCE_MODE_INACTIVE_LINK_NUM) { + param.num_link_entry = 1; + param.link_num[0].num_of_link = num_mlo_vdev - 1; + } + + return ath12k_wmi_send_mlo_link_set_active_cmd(ab, ¶m); +} + +static int ath12k_mac_mlo_sta_update_link_active(struct ath12k_base *ab, + struct ieee80211_hw *hw, + struct ath12k_vif *ahvif) +{ + u8 mlo_vdev_id_lst[IEEE80211_MLD_MAX_NUM_LINKS] = {0}; + u32 mlo_freq_list[IEEE80211_MLD_MAX_NUM_LINKS] = {0}; + unsigned long links = ahvif->links_map; + enum wmi_mlo_link_force_reason reason; + struct ieee80211_chanctx_conf *conf; + enum wmi_mlo_link_force_mode mode; + struct ieee80211_bss_conf *info; + struct ath12k_link_vif *arvif; + u8 num_mlo_vdev = 0; + u8 link_id; + + for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { + arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); + /* make sure vdev is created on this device */ + if (!arvif || !arvif->is_created || arvif->ar->ab != ab) + continue; + + info = ath12k_mac_get_link_bss_conf(arvif); + conf = wiphy_dereference(hw->wiphy, info->chanctx_conf); + mlo_freq_list[num_mlo_vdev] = conf->def.chan->center_freq; + + mlo_vdev_id_lst[num_mlo_vdev] = arvif->vdev_id; + num_mlo_vdev++; + } + + /* It is not allowed to activate more links than a single device + * supported. Something goes wrong if we reach here. + */ + if (num_mlo_vdev > ATH12K_NUM_MAX_ACTIVE_LINKS_PER_DEVICE) { + WARN_ON_ONCE(1); + return -EINVAL; + } + + /* if 2 links are established and both link channels fall on the + * same hardware MAC, send command to firmware to deactivate one + * of them. + */ + if (num_mlo_vdev == 2 && + ath12k_mac_freqs_on_same_mac(ab, mlo_freq_list[0], + mlo_freq_list[1])) { + mode = WMI_MLO_LINK_FORCE_MODE_INACTIVE_LINK_NUM; + reason = WMI_MLO_LINK_FORCE_REASON_NEW_CONNECT; + return ath12k_mac_mlo_sta_set_link_active(ab, reason, mode, + mlo_vdev_id_lst, num_mlo_vdev, + NULL, 0); + } + + return 0; +} + +static bool ath12k_mac_are_sbs_chan(struct ath12k_base *ab, u32 freq_1, u32 freq_2) +{ + if (!ath12k_mac_is_hw_sbs_capable(ab)) + return false; + + if (ath12k_is_2ghz_channel_freq(freq_1) || + ath12k_is_2ghz_channel_freq(freq_2)) + return false; + + return !ath12k_mac_2_freq_same_mac_in_sbs(ab, freq_1, freq_2); +} + +static bool ath12k_mac_are_dbs_chan(struct ath12k_base *ab, u32 freq_1, u32 freq_2) +{ + if (!ath12k_mac_is_hw_dbs_capable(ab)) + return false; + + return !ath12k_mac_2_freq_same_mac_in_dbs(ab, freq_1, freq_2); +} + +static int ath12k_mac_select_links(struct ath12k_base *ab, + struct ieee80211_vif *vif, + struct ieee80211_hw *hw, + u16 *selected_links) +{ + unsigned long useful_links = ieee80211_vif_usable_links(vif); + struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); + u8 num_useful_links = hweight_long(useful_links); + struct ieee80211_chanctx_conf *chanctx; + struct ath12k_link_vif *assoc_arvif; + u32 assoc_link_freq, partner_freq; + u16 sbs_links = 0, dbs_links = 0; + struct ieee80211_bss_conf *info; + struct ieee80211_channel *chan; + struct ieee80211_sta *sta; + struct ath12k_sta *ahsta; + u8 link_id; + + /* activate all useful links if less than max supported */ + if (num_useful_links <= ATH12K_NUM_MAX_ACTIVE_LINKS_PER_DEVICE) { + *selected_links = useful_links; + return 0; + } + + /* only in station mode we can get here, so it's safe + * to use ap_addr + */ + rcu_read_lock(); + sta = ieee80211_find_sta(vif, vif->cfg.ap_addr); + if (!sta) { + rcu_read_unlock(); + ath12k_warn(ab, "failed to find sta with addr %pM\n", vif->cfg.ap_addr); + return -EINVAL; + } + + ahsta = ath12k_sta_to_ahsta(sta); + assoc_arvif = wiphy_dereference(hw->wiphy, ahvif->link[ahsta->assoc_link_id]); + info = ath12k_mac_get_link_bss_conf(assoc_arvif); + chanctx = rcu_dereference(info->chanctx_conf); + assoc_link_freq = chanctx->def.chan->center_freq; + rcu_read_unlock(); + ath12k_dbg(ab, ATH12K_DBG_MAC, "assoc link %u freq %u\n", + assoc_arvif->link_id, assoc_link_freq); + + /* assoc link is already activated and has to be kept active, + * only need to select a partner link from others. + */ + useful_links &= ~BIT(assoc_arvif->link_id); + for_each_set_bit(link_id, &useful_links, IEEE80211_MLD_MAX_NUM_LINKS) { + info = wiphy_dereference(hw->wiphy, vif->link_conf[link_id]); + if (!info) { + ath12k_warn(ab, "failed to get link info for link: %u\n", + link_id); + return -ENOLINK; + } + + chan = info->chanreq.oper.chan; + if (!chan) { + ath12k_warn(ab, "failed to get chan for link: %u\n", link_id); + return -EINVAL; + } + + partner_freq = chan->center_freq; + if (ath12k_mac_are_sbs_chan(ab, assoc_link_freq, partner_freq)) { + sbs_links |= BIT(link_id); + ath12k_dbg(ab, ATH12K_DBG_MAC, "new SBS link %u freq %u\n", + link_id, partner_freq); + continue; + } + + if (ath12k_mac_are_dbs_chan(ab, assoc_link_freq, partner_freq)) { + dbs_links |= BIT(link_id); + ath12k_dbg(ab, ATH12K_DBG_MAC, "new DBS link %u freq %u\n", + link_id, partner_freq); + continue; + } + + ath12k_dbg(ab, ATH12K_DBG_MAC, "non DBS/SBS link %u freq %u\n", + link_id, partner_freq); + } + + /* choose the first candidate no matter how many is in the list */ + if (sbs_links) + link_id = __ffs(sbs_links); + else if (dbs_links) + link_id = __ffs(dbs_links); + else + link_id = ffs(useful_links) - 1; + + ath12k_dbg(ab, ATH12K_DBG_MAC, "select partner link %u\n", link_id); + + *selected_links = BIT(assoc_arvif->link_id) | BIT(link_id); + + return 0; +} + static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, @@ -5899,10 +6208,13 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(sta); struct ath12k_hw *ah = ath12k_hw_to_ah(hw); + struct ath12k_base *prev_ab = NULL, *ab; struct ath12k_link_vif *arvif; struct ath12k_link_sta *arsta; unsigned long valid_links; - u8 link_id = 0; + u16 selected_links = 0; + u8 link_id = 0, i; + struct ath12k *ar; int ret; lockdep_assert_wiphy(hw->wiphy); @@ -5972,8 +6284,24 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, * about to move to the associated state. */ if (ieee80211_vif_is_mld(vif) && vif->type == NL80211_IFTYPE_STATION && - old_state == IEEE80211_STA_AUTH && new_state == IEEE80211_STA_ASSOC) - ieee80211_set_active_links(vif, ieee80211_vif_usable_links(vif)); + old_state == IEEE80211_STA_AUTH && new_state == IEEE80211_STA_ASSOC) { + /* TODO: for now only do link selection for single device + * MLO case. Other cases would be handled in the future. + */ + ab = ah->radio[0].ab; + if (ab->ag->num_devices == 1) { + ret = ath12k_mac_select_links(ab, vif, hw, &selected_links); + if (ret) { + ath12k_warn(ab, + "failed to get selected links: %d\n", ret); + goto exit; + } + } else { + selected_links = ieee80211_vif_usable_links(vif); + } + + ieee80211_set_active_links(vif, selected_links); + } /* Handle all the other state transitions in generic way */ valid_links = ahsta->links_map; @@ -5997,6 +6325,24 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, } } + if (ieee80211_vif_is_mld(vif) && vif->type == NL80211_IFTYPE_STATION && + old_state == IEEE80211_STA_ASSOC && new_state == IEEE80211_STA_AUTHORIZED) { + for_each_ar(ah, ar, i) { + ab = ar->ab; + if (prev_ab == ab) + continue; + + ret = ath12k_mac_mlo_sta_update_link_active(ab, hw, ahvif); + if (ret) { + ath12k_warn(ab, + "failed to update link active state on connect %d\n", + ret); + goto exit; + } + + prev_ab = ab; + } + } /* IEEE80211_STA_NONE -> IEEE80211_STA_NOTEXIST: * Remove the station from driver (handle ML sta here since that * needs special handling. Normal sta will be handled in generic diff --git a/drivers/net/wireless/ath/ath12k/mac.h b/drivers/net/wireless/ath/ath12k/mac.h index e6e74b45bfa4..cc81b1f5680f 100644 --- a/drivers/net/wireless/ath/ath12k/mac.h +++ b/drivers/net/wireless/ath/ath12k/mac.h @@ -54,6 +54,8 @@ struct ath12k_generic_iter { #define ATH12K_DEFAULT_SCAN_LINK IEEE80211_MLD_MAX_NUM_LINKS #define ATH12K_NUM_MAX_LINKS (IEEE80211_MLD_MAX_NUM_LINKS + 1) +#define ATH12K_NUM_MAX_ACTIVE_LINKS_PER_DEVICE 2 + enum ath12k_supported_bw { ATH12K_BW_20 = 0, ATH12K_BW_40 = 1, diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 60e2444fe08c..465f877fc0fb 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -91,6 +91,11 @@ struct ath12k_wmi_svc_rdy_ext2_parse { bool dma_ring_cap_done; bool spectral_bin_scaling_done; bool mac_phy_caps_ext_done; + bool hal_reg_caps_ext2_done; + bool scan_radio_caps_ext2_done; + bool twt_caps_done; + bool htt_msdu_idx_to_qtype_map_done; + bool dbs_or_sbs_cap_ext_done; }; struct ath12k_wmi_rdy_parse { @@ -4395,6 +4400,7 @@ static int ath12k_wmi_hw_mode_caps_parse(struct ath12k_base *soc, static int ath12k_wmi_hw_mode_caps(struct ath12k_base *soc, u16 len, const void *ptr, void *data) { + struct ath12k_svc_ext_info *svc_ext_info = &soc->wmi_ab.svc_ext_info; struct ath12k_wmi_svc_rdy_ext_parse *svc_rdy_ext = data; const struct ath12k_wmi_hw_mode_cap_params *hw_mode_caps; enum wmi_host_hw_mode_config_type mode, pref; @@ -4427,8 +4433,11 @@ static int ath12k_wmi_hw_mode_caps(struct ath12k_base *soc, } } - ath12k_dbg(soc, ATH12K_DBG_WMI, "preferred_hw_mode:%d\n", - soc->wmi_ab.preferred_hw_mode); + svc_ext_info->num_hw_modes = svc_rdy_ext->n_hw_mode_caps; + + ath12k_dbg(soc, ATH12K_DBG_WMI, "num hw modes %u preferred_hw_mode %d\n", + svc_ext_info->num_hw_modes, soc->wmi_ab.preferred_hw_mode); + if (soc->wmi_ab.preferred_hw_mode == WMI_HOST_HW_MODE_MAX) return -EINVAL; @@ -4658,6 +4667,65 @@ free_dir_buff: return ret; } +static void +ath12k_wmi_save_mac_phy_info(struct ath12k_base *ab, + const struct ath12k_wmi_mac_phy_caps_params *mac_phy_cap, + struct ath12k_svc_ext_mac_phy_info *mac_phy_info) +{ + mac_phy_info->phy_id = __le32_to_cpu(mac_phy_cap->phy_id); + mac_phy_info->supported_bands = __le32_to_cpu(mac_phy_cap->supported_bands); + mac_phy_info->hw_freq_range.low_2ghz_freq = + __le32_to_cpu(mac_phy_cap->low_2ghz_chan_freq); + mac_phy_info->hw_freq_range.high_2ghz_freq = + __le32_to_cpu(mac_phy_cap->high_2ghz_chan_freq); + mac_phy_info->hw_freq_range.low_5ghz_freq = + __le32_to_cpu(mac_phy_cap->low_5ghz_chan_freq); + mac_phy_info->hw_freq_range.high_5ghz_freq = + __le32_to_cpu(mac_phy_cap->high_5ghz_chan_freq); +} + +static void +ath12k_wmi_save_all_mac_phy_info(struct ath12k_base *ab, + struct ath12k_wmi_svc_rdy_ext_parse *svc_rdy_ext) +{ + struct ath12k_svc_ext_info *svc_ext_info = &ab->wmi_ab.svc_ext_info; + const struct ath12k_wmi_mac_phy_caps_params *mac_phy_cap; + const struct ath12k_wmi_hw_mode_cap_params *hw_mode_cap; + struct ath12k_svc_ext_mac_phy_info *mac_phy_info; + u32 hw_mode_id, phy_bit_map; + u8 hw_idx; + + mac_phy_info = &svc_ext_info->mac_phy_info[0]; + mac_phy_cap = svc_rdy_ext->mac_phy_caps; + + for (hw_idx = 0; hw_idx < svc_ext_info->num_hw_modes; hw_idx++) { + hw_mode_cap = &svc_rdy_ext->hw_mode_caps[hw_idx]; + hw_mode_id = __le32_to_cpu(hw_mode_cap->hw_mode_id); + phy_bit_map = __le32_to_cpu(hw_mode_cap->phy_id_map); + + while (phy_bit_map) { + ath12k_wmi_save_mac_phy_info(ab, mac_phy_cap, mac_phy_info); + mac_phy_info->hw_mode_config_type = + le32_get_bits(hw_mode_cap->hw_mode_config_type, + WMI_HW_MODE_CAP_CFG_TYPE); + ath12k_dbg(ab, ATH12K_DBG_WMI, + "hw_idx %u hw_mode_id %u hw_mode_config_type %u supported_bands %u phy_id %u 2 GHz [%u - %u] 5 GHz [%u - %u]\n", + hw_idx, hw_mode_id, + mac_phy_info->hw_mode_config_type, + mac_phy_info->supported_bands, mac_phy_info->phy_id, + mac_phy_info->hw_freq_range.low_2ghz_freq, + mac_phy_info->hw_freq_range.high_2ghz_freq, + mac_phy_info->hw_freq_range.low_5ghz_freq, + mac_phy_info->hw_freq_range.high_5ghz_freq); + + mac_phy_cap++; + mac_phy_info++; + + phy_bit_map >>= 1; + } + } +} + static int ath12k_wmi_svc_rdy_ext_parse(struct ath12k_base *ab, u16 tag, u16 len, const void *ptr, void *data) @@ -4706,6 +4774,8 @@ static int ath12k_wmi_svc_rdy_ext_parse(struct ath12k_base *ab, return ret; } + ath12k_wmi_save_all_mac_phy_info(ab, svc_rdy_ext); + svc_rdy_ext->mac_phy_done = true; } else if (!svc_rdy_ext->ext_hal_reg_done) { ret = ath12k_wmi_ext_hal_reg_caps(ab, len, ptr, svc_rdy_ext); @@ -4922,10 +4992,449 @@ static int ath12k_wmi_tlv_mac_phy_caps_ext(struct ath12k_base *ab, u16 tag, return 0; } +static void +ath12k_wmi_update_freq_info(struct ath12k_base *ab, + struct ath12k_svc_ext_mac_phy_info *mac_cap, + enum ath12k_hw_mode mode, + u32 phy_id) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *mac_range; + + mac_range = &hw_mode_info->freq_range_caps[mode][phy_id]; + + if (mac_cap->supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { + mac_range->low_2ghz_freq = max_t(u32, + mac_cap->hw_freq_range.low_2ghz_freq, + ATH12K_MIN_2GHZ_FREQ); + mac_range->high_2ghz_freq = mac_cap->hw_freq_range.high_2ghz_freq ? + min_t(u32, + mac_cap->hw_freq_range.high_2ghz_freq, + ATH12K_MAX_2GHZ_FREQ) : + ATH12K_MAX_2GHZ_FREQ; + } + + if (mac_cap->supported_bands & WMI_HOST_WLAN_5GHZ_CAP) { + mac_range->low_5ghz_freq = max_t(u32, + mac_cap->hw_freq_range.low_5ghz_freq, + ATH12K_MIN_5GHZ_FREQ); + mac_range->high_5ghz_freq = mac_cap->hw_freq_range.high_5ghz_freq ? + min_t(u32, + mac_cap->hw_freq_range.high_5ghz_freq, + ATH12K_MAX_6GHZ_FREQ) : + ATH12K_MAX_6GHZ_FREQ; + } +} + +static bool +ath12k_wmi_all_phy_range_updated(struct ath12k_base *ab, + enum ath12k_hw_mode hwmode) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *mac_range; + u8 phy_id; + + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + mac_range = &hw_mode_info->freq_range_caps[hwmode][phy_id]; + /* modify SBS/DBS range only when both phy for DBS are filled */ + if (!mac_range->low_2ghz_freq && !mac_range->low_5ghz_freq) + return false; + } + + return true; +} + +static void ath12k_wmi_update_dbs_freq_info(struct ath12k_base *ab) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *mac_range; + u8 phy_id; + + mac_range = hw_mode_info->freq_range_caps[ATH12K_HW_MODE_DBS]; + /* Reset 5 GHz range for shared mac for DBS */ + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + if (mac_range[phy_id].low_2ghz_freq && + mac_range[phy_id].low_5ghz_freq) { + mac_range[phy_id].low_5ghz_freq = 0; + mac_range[phy_id].high_5ghz_freq = 0; + } + } +} + +static u32 +ath12k_wmi_get_highest_5ghz_freq_from_range(struct ath12k_hw_mode_freq_range_arg *range) +{ + u32 highest_freq = 0; + u8 phy_id; + + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + if (range[phy_id].high_5ghz_freq > highest_freq) + highest_freq = range[phy_id].high_5ghz_freq; + } + + return highest_freq ? highest_freq : ATH12K_MAX_6GHZ_FREQ; +} + +static u32 +ath12k_wmi_get_lowest_5ghz_freq_from_range(struct ath12k_hw_mode_freq_range_arg *range) +{ + u32 lowest_freq = 0; + u8 phy_id; + + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + if ((!lowest_freq && range[phy_id].low_5ghz_freq) || + range[phy_id].low_5ghz_freq < lowest_freq) + lowest_freq = range[phy_id].low_5ghz_freq; + } + + return lowest_freq ? lowest_freq : ATH12K_MIN_5GHZ_FREQ; +} + +static void +ath12k_wmi_fill_upper_share_sbs_freq(struct ath12k_base *ab, + u16 sbs_range_sep, + struct ath12k_hw_mode_freq_range_arg *ref_freq) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *upper_sbs_freq_range; + u8 phy_id; + + upper_sbs_freq_range = + hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS_UPPER_SHARE]; + + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + upper_sbs_freq_range[phy_id].low_2ghz_freq = + ref_freq[phy_id].low_2ghz_freq; + upper_sbs_freq_range[phy_id].high_2ghz_freq = + ref_freq[phy_id].high_2ghz_freq; + + /* update for shared mac */ + if (upper_sbs_freq_range[phy_id].low_2ghz_freq) { + upper_sbs_freq_range[phy_id].low_5ghz_freq = sbs_range_sep + 10; + upper_sbs_freq_range[phy_id].high_5ghz_freq = + ath12k_wmi_get_highest_5ghz_freq_from_range(ref_freq); + } else { + upper_sbs_freq_range[phy_id].low_5ghz_freq = + ath12k_wmi_get_lowest_5ghz_freq_from_range(ref_freq); + upper_sbs_freq_range[phy_id].high_5ghz_freq = sbs_range_sep; + } + } +} + +static void +ath12k_wmi_fill_lower_share_sbs_freq(struct ath12k_base *ab, + u16 sbs_range_sep, + struct ath12k_hw_mode_freq_range_arg *ref_freq) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *lower_sbs_freq_range; + u8 phy_id; + + lower_sbs_freq_range = + hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS_LOWER_SHARE]; + + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + lower_sbs_freq_range[phy_id].low_2ghz_freq = + ref_freq[phy_id].low_2ghz_freq; + lower_sbs_freq_range[phy_id].high_2ghz_freq = + ref_freq[phy_id].high_2ghz_freq; + + /* update for shared mac */ + if (lower_sbs_freq_range[phy_id].low_2ghz_freq) { + lower_sbs_freq_range[phy_id].low_5ghz_freq = + ath12k_wmi_get_lowest_5ghz_freq_from_range(ref_freq); + lower_sbs_freq_range[phy_id].high_5ghz_freq = sbs_range_sep; + } else { + lower_sbs_freq_range[phy_id].low_5ghz_freq = sbs_range_sep + 10; + lower_sbs_freq_range[phy_id].high_5ghz_freq = + ath12k_wmi_get_highest_5ghz_freq_from_range(ref_freq); + } + } +} + +static const char *ath12k_wmi_hw_mode_to_str(enum ath12k_hw_mode hw_mode) +{ + static const char * const mode_str[] = { + [ATH12K_HW_MODE_SMM] = "SMM", + [ATH12K_HW_MODE_DBS] = "DBS", + [ATH12K_HW_MODE_SBS] = "SBS", + [ATH12K_HW_MODE_SBS_UPPER_SHARE] = "SBS_UPPER_SHARE", + [ATH12K_HW_MODE_SBS_LOWER_SHARE] = "SBS_LOWER_SHARE", + }; + + if (hw_mode >= ARRAY_SIZE(mode_str)) + return "Unknown"; + + return mode_str[hw_mode]; +} + +static void +ath12k_wmi_dump_freq_range_per_mac(struct ath12k_base *ab, + struct ath12k_hw_mode_freq_range_arg *freq_range, + enum ath12k_hw_mode hw_mode) +{ + u8 i; + + for (i = 0; i < MAX_RADIOS; i++) + if (freq_range[i].low_2ghz_freq || freq_range[i].low_5ghz_freq) + ath12k_dbg(ab, ATH12K_DBG_WMI, + "frequency range: %s(%d) mac %d 2 GHz [%d - %d] 5 GHz [%d - %d]", + ath12k_wmi_hw_mode_to_str(hw_mode), + hw_mode, i, + freq_range[i].low_2ghz_freq, + freq_range[i].high_2ghz_freq, + freq_range[i].low_5ghz_freq, + freq_range[i].high_5ghz_freq); +} + +static void ath12k_wmi_dump_freq_range(struct ath12k_base *ab) +{ + struct ath12k_hw_mode_freq_range_arg *freq_range; + u8 i; + + for (i = ATH12K_HW_MODE_SMM; i < ATH12K_HW_MODE_MAX; i++) { + freq_range = ab->wmi_ab.hw_mode_info.freq_range_caps[i]; + ath12k_wmi_dump_freq_range_per_mac(ab, freq_range, i); + } +} + +static int ath12k_wmi_modify_sbs_freq(struct ath12k_base *ab, u8 phy_id) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *sbs_mac_range, *shared_mac_range; + struct ath12k_hw_mode_freq_range_arg *non_shared_range; + u8 shared_phy_id; + + sbs_mac_range = &hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS][phy_id]; + + /* if SBS mac range has both 2.4 and 5 GHz ranges, i.e. shared phy_id + * keep the range as it is in SBS + */ + if (sbs_mac_range->low_2ghz_freq && sbs_mac_range->low_5ghz_freq) + return 0; + + if (sbs_mac_range->low_2ghz_freq && !sbs_mac_range->low_5ghz_freq) { + ath12k_err(ab, "Invalid DBS/SBS mode with only 2.4Ghz"); + ath12k_wmi_dump_freq_range_per_mac(ab, sbs_mac_range, ATH12K_HW_MODE_SBS); + return -EINVAL; + } + + non_shared_range = sbs_mac_range; + /* if SBS mac range has only 5 GHz then it's the non-shared phy, so + * modify the range as per the shared mac. + */ + shared_phy_id = phy_id ? 0 : 1; + shared_mac_range = + &hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS][shared_phy_id]; + + if (shared_mac_range->low_5ghz_freq > non_shared_range->low_5ghz_freq) { + ath12k_dbg(ab, ATH12K_DBG_WMI, "high 5 GHz shared"); + /* If the shared mac lower 5 GHz frequency is greater than + * non-shared mac lower 5 GHz frequency then the shared mac has + * high 5 GHz shared with 2.4 GHz. So non-shared mac's 5 GHz high + * freq should be less than the shared mac's low 5 GHz freq. + */ + if (non_shared_range->high_5ghz_freq >= + shared_mac_range->low_5ghz_freq) + non_shared_range->high_5ghz_freq = + max_t(u32, shared_mac_range->low_5ghz_freq - 10, + non_shared_range->low_5ghz_freq); + } else if (shared_mac_range->high_5ghz_freq < + non_shared_range->high_5ghz_freq) { + ath12k_dbg(ab, ATH12K_DBG_WMI, "low 5 GHz shared"); + /* If the shared mac high 5 GHz frequency is less than + * non-shared mac high 5 GHz frequency then the shared mac has + * low 5 GHz shared with 2.4 GHz. So non-shared mac's 5 GHz low + * freq should be greater than the shared mac's high 5 GHz freq. + */ + if (shared_mac_range->high_5ghz_freq >= + non_shared_range->low_5ghz_freq) + non_shared_range->low_5ghz_freq = + min_t(u32, shared_mac_range->high_5ghz_freq + 10, + non_shared_range->high_5ghz_freq); + } else { + ath12k_warn(ab, "invalid SBS range with all 5 GHz shared"); + return -EINVAL; + } + + return 0; +} + +static void ath12k_wmi_update_sbs_freq_info(struct ath12k_base *ab) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *mac_range; + u16 sbs_range_sep; + u8 phy_id; + int ret; + + mac_range = hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS]; + + /* If sbs_lower_band_end_freq has a value, then the frequency range + * will be split using that value. + */ + sbs_range_sep = ab->wmi_ab.sbs_lower_band_end_freq; + if (sbs_range_sep) { + ath12k_wmi_fill_upper_share_sbs_freq(ab, sbs_range_sep, + mac_range); + ath12k_wmi_fill_lower_share_sbs_freq(ab, sbs_range_sep, + mac_range); + /* Hardware specifies the range boundary with sbs_range_sep, + * (i.e. the boundary between 5 GHz high and 5 GHz low), + * reset the original one to make sure it will not get used. + */ + memset(mac_range, 0, sizeof(*mac_range) * MAX_RADIOS); + return; + } + + /* If sbs_lower_band_end_freq is not set that means firmware will send one + * shared mac range and one non-shared mac range. so update that freq. + */ + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + ret = ath12k_wmi_modify_sbs_freq(ab, phy_id); + if (ret) { + memset(mac_range, 0, sizeof(*mac_range) * MAX_RADIOS); + break; + } + } +} + +static void +ath12k_wmi_update_mac_freq_info(struct ath12k_base *ab, + enum wmi_host_hw_mode_config_type hw_config_type, + u32 phy_id, + struct ath12k_svc_ext_mac_phy_info *mac_cap) +{ + if (phy_id >= MAX_RADIOS) { + ath12k_err(ab, "mac more than two not supported: %d", phy_id); + return; + } + + ath12k_dbg(ab, ATH12K_DBG_WMI, + "hw_mode_cfg %d mac %d band 0x%x SBS cutoff freq %d 2 GHz [%d - %d] 5 GHz [%d - %d]", + hw_config_type, phy_id, mac_cap->supported_bands, + ab->wmi_ab.sbs_lower_band_end_freq, + mac_cap->hw_freq_range.low_2ghz_freq, + mac_cap->hw_freq_range.high_2ghz_freq, + mac_cap->hw_freq_range.low_5ghz_freq, + mac_cap->hw_freq_range.high_5ghz_freq); + + switch (hw_config_type) { + case WMI_HOST_HW_MODE_SINGLE: + if (phy_id) { + ath12k_dbg(ab, ATH12K_DBG_WMI, "mac phy 1 is not supported"); + break; + } + ath12k_wmi_update_freq_info(ab, mac_cap, ATH12K_HW_MODE_SMM, phy_id); + break; + + case WMI_HOST_HW_MODE_DBS: + if (!ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_DBS)) + ath12k_wmi_update_freq_info(ab, mac_cap, + ATH12K_HW_MODE_DBS, phy_id); + break; + case WMI_HOST_HW_MODE_DBS_SBS: + case WMI_HOST_HW_MODE_DBS_OR_SBS: + ath12k_wmi_update_freq_info(ab, mac_cap, ATH12K_HW_MODE_DBS, phy_id); + if (ab->wmi_ab.sbs_lower_band_end_freq || + mac_cap->hw_freq_range.low_5ghz_freq || + mac_cap->hw_freq_range.low_2ghz_freq) + ath12k_wmi_update_freq_info(ab, mac_cap, ATH12K_HW_MODE_SBS, + phy_id); + + if (ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_DBS)) + ath12k_wmi_update_dbs_freq_info(ab); + if (ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS)) + ath12k_wmi_update_sbs_freq_info(ab); + break; + case WMI_HOST_HW_MODE_SBS: + case WMI_HOST_HW_MODE_SBS_PASSIVE: + ath12k_wmi_update_freq_info(ab, mac_cap, ATH12K_HW_MODE_SBS, phy_id); + if (ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS)) + ath12k_wmi_update_sbs_freq_info(ab); + + break; + default: + break; + } +} + +static bool ath12k_wmi_sbs_range_present(struct ath12k_base *ab) +{ + if (ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS) || + (ab->wmi_ab.sbs_lower_band_end_freq && + ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS_LOWER_SHARE) && + ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS_UPPER_SHARE))) + return true; + + return false; +} + +static int ath12k_wmi_update_hw_mode_list(struct ath12k_base *ab) +{ + struct ath12k_svc_ext_info *svc_ext_info = &ab->wmi_ab.svc_ext_info; + struct ath12k_hw_mode_info *info = &ab->wmi_ab.hw_mode_info; + enum wmi_host_hw_mode_config_type hw_config_type; + struct ath12k_svc_ext_mac_phy_info *tmp; + bool dbs_mode = false, sbs_mode = false; + u32 i, j = 0; + + if (!svc_ext_info->num_hw_modes) { + ath12k_err(ab, "invalid number of hw modes"); + return -EINVAL; + } + + ath12k_dbg(ab, ATH12K_DBG_WMI, "updated HW mode list: num modes %d", + svc_ext_info->num_hw_modes); + + memset(info->freq_range_caps, 0, sizeof(info->freq_range_caps)); + + for (i = 0; i < svc_ext_info->num_hw_modes; i++) { + if (j >= ATH12K_MAX_MAC_PHY_CAP) + return -EINVAL; + + /* Update for MAC0 */ + tmp = &svc_ext_info->mac_phy_info[j++]; + hw_config_type = tmp->hw_mode_config_type; + ath12k_wmi_update_mac_freq_info(ab, hw_config_type, tmp->phy_id, tmp); + + /* SBS and DBS have dual MAC. Up to 2 MACs are considered. */ + if (hw_config_type == WMI_HOST_HW_MODE_DBS || + hw_config_type == WMI_HOST_HW_MODE_SBS_PASSIVE || + hw_config_type == WMI_HOST_HW_MODE_SBS || + hw_config_type == WMI_HOST_HW_MODE_DBS_OR_SBS) { + if (j >= ATH12K_MAX_MAC_PHY_CAP) + return -EINVAL; + /* Update for MAC1 */ + tmp = &svc_ext_info->mac_phy_info[j++]; + ath12k_wmi_update_mac_freq_info(ab, hw_config_type, + tmp->phy_id, tmp); + + if (hw_config_type == WMI_HOST_HW_MODE_DBS || + hw_config_type == WMI_HOST_HW_MODE_DBS_OR_SBS) + dbs_mode = true; + + if (ath12k_wmi_sbs_range_present(ab) && + (hw_config_type == WMI_HOST_HW_MODE_SBS_PASSIVE || + hw_config_type == WMI_HOST_HW_MODE_SBS || + hw_config_type == WMI_HOST_HW_MODE_DBS_OR_SBS)) + sbs_mode = true; + } + } + + info->support_dbs = dbs_mode; + info->support_sbs = sbs_mode; + + ath12k_wmi_dump_freq_range(ab); + + return 0; +} + static int ath12k_wmi_svc_rdy_ext2_parse(struct ath12k_base *ab, u16 tag, u16 len, const void *ptr, void *data) { + const struct ath12k_wmi_dbs_or_sbs_cap_params *dbs_or_sbs_caps; struct ath12k_wmi_pdev *wmi_handle = &ab->wmi_ab.wmi[0]; struct ath12k_wmi_svc_rdy_ext2_parse *parse = data; int ret; @@ -4967,7 +5476,32 @@ static int ath12k_wmi_svc_rdy_ext2_parse(struct ath12k_base *ab, } parse->mac_phy_caps_ext_done = true; + } else if (!parse->hal_reg_caps_ext2_done) { + parse->hal_reg_caps_ext2_done = true; + } else if (!parse->scan_radio_caps_ext2_done) { + parse->scan_radio_caps_ext2_done = true; + } else if (!parse->twt_caps_done) { + parse->twt_caps_done = true; + } else if (!parse->htt_msdu_idx_to_qtype_map_done) { + parse->htt_msdu_idx_to_qtype_map_done = true; + } else if (!parse->dbs_or_sbs_cap_ext_done) { + dbs_or_sbs_caps = ptr; + ab->wmi_ab.sbs_lower_band_end_freq = + __le32_to_cpu(dbs_or_sbs_caps->sbs_lower_band_end_freq); + + ath12k_dbg(ab, ATH12K_DBG_WMI, "sbs_lower_band_end_freq %u\n", + ab->wmi_ab.sbs_lower_band_end_freq); + + ret = ath12k_wmi_update_hw_mode_list(ab); + if (ret) { + ath12k_warn(ab, "failed to update hw mode list: %d\n", + ret); + return ret; + } + + parse->dbs_or_sbs_cap_ext_done = true; } + break; default: break; @@ -7626,6 +8160,64 @@ static int ath12k_wmi_pull_fw_stats(struct ath12k_base *ab, struct sk_buff *skb, &parse); } +static void ath12k_wmi_fw_stats_process(struct ath12k *ar, + struct ath12k_fw_stats *stats) +{ + struct ath12k_base *ab = ar->ab; + struct ath12k_pdev *pdev; + bool is_end = true; + size_t total_vdevs_started = 0; + int i; + + if (stats->stats_id == WMI_REQUEST_VDEV_STAT) { + if (list_empty(&stats->vdevs)) { + ath12k_warn(ab, "empty vdev stats"); + return; + } + /* FW sends all the active VDEV stats irrespective of PDEV, + * hence limit until the count of all VDEVs started + */ + rcu_read_lock(); + for (i = 0; i < ab->num_radios; i++) { + pdev = rcu_dereference(ab->pdevs_active[i]); + if (pdev && pdev->ar) + total_vdevs_started += pdev->ar->num_started_vdevs; + } + rcu_read_unlock(); + + if (total_vdevs_started) + is_end = ((++ar->fw_stats.num_vdev_recvd) == + total_vdevs_started); + + list_splice_tail_init(&stats->vdevs, + &ar->fw_stats.vdevs); + + if (is_end) + complete(&ar->fw_stats_done); + + return; + } + + if (stats->stats_id == WMI_REQUEST_BCN_STAT) { + if (list_empty(&stats->bcn)) { + ath12k_warn(ab, "empty beacon stats"); + return; + } + /* Mark end until we reached the count of all started VDEVs + * within the PDEV + */ + if (ar->num_started_vdevs) + is_end = ((++ar->fw_stats.num_bcn_recvd) == + ar->num_started_vdevs); + + list_splice_tail_init(&stats->bcn, + &ar->fw_stats.bcn); + + if (is_end) + complete(&ar->fw_stats_done); + } +} + static void ath12k_update_stats_event(struct ath12k_base *ab, struct sk_buff *skb) { struct ath12k_fw_stats stats = {}; @@ -7655,19 +8247,15 @@ static void ath12k_update_stats_event(struct ath12k_base *ab, struct sk_buff *sk spin_lock_bh(&ar->data_lock); - /* WMI_REQUEST_PDEV_STAT can be requested via .get_txpower mac ops or via - * debugfs fw stats. Therefore, processing it separately. - */ + /* Handle WMI_REQUEST_PDEV_STAT status update */ if (stats.stats_id == WMI_REQUEST_PDEV_STAT) { list_splice_tail_init(&stats.pdevs, &ar->fw_stats.pdevs); - ar->fw_stats.fw_stats_done = true; + complete(&ar->fw_stats_done); goto complete; } - /* WMI_REQUEST_VDEV_STAT and WMI_REQUEST_BCN_STAT are currently requested only - * via debugfs fw stats. Hence, processing these in debugfs context. - */ - ath12k_debugfs_fw_stats_process(ar, &stats); + /* Handle WMI_REQUEST_VDEV_STAT and WMI_REQUEST_BCN_STAT updates. */ + ath12k_wmi_fw_stats_process(ar, &stats); complete: complete(&ar->fw_stats_complete); @@ -9911,3 +10499,224 @@ int ath12k_wmi_send_vdev_set_tpc_power(struct ath12k *ar, return 0; } + +static int +ath12k_wmi_fill_disallowed_bmap(struct ath12k_base *ab, + struct wmi_disallowed_mlo_mode_bitmap_params *dislw_bmap, + struct wmi_mlo_link_set_active_arg *arg) +{ + struct wmi_ml_disallow_mode_bmap_arg *dislw_bmap_arg; + u8 i; + + if (arg->num_disallow_mode_comb > + ARRAY_SIZE(arg->disallow_bmap)) { + ath12k_warn(ab, "invalid num_disallow_mode_comb: %d", + arg->num_disallow_mode_comb); + return -EINVAL; + } + + dislw_bmap_arg = &arg->disallow_bmap[0]; + for (i = 0; i < arg->num_disallow_mode_comb; i++) { + dislw_bmap->tlv_header = + ath12k_wmi_tlv_cmd_hdr(0, sizeof(*dislw_bmap)); + dislw_bmap->disallowed_mode_bitmap = + cpu_to_le32(dislw_bmap_arg->disallowed_mode); + dislw_bmap->ieee_link_id_comb = + le32_encode_bits(dislw_bmap_arg->ieee_link_id[0], + WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_1) | + le32_encode_bits(dislw_bmap_arg->ieee_link_id[1], + WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_2) | + le32_encode_bits(dislw_bmap_arg->ieee_link_id[2], + WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_3) | + le32_encode_bits(dislw_bmap_arg->ieee_link_id[3], + WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_4); + + ath12k_dbg(ab, ATH12K_DBG_WMI, + "entry %d disallowed_mode %d ieee_link_id_comb 0x%x", + i, dislw_bmap_arg->disallowed_mode, + dislw_bmap_arg->ieee_link_id_comb); + dislw_bmap++; + dislw_bmap_arg++; + } + + return 0; +} + +int ath12k_wmi_send_mlo_link_set_active_cmd(struct ath12k_base *ab, + struct wmi_mlo_link_set_active_arg *arg) +{ + struct wmi_disallowed_mlo_mode_bitmap_params *disallowed_mode_bmap; + struct wmi_mlo_set_active_link_number_params *link_num_param; + u32 num_link_num_param = 0, num_vdev_bitmap = 0; + struct ath12k_wmi_base *wmi_ab = &ab->wmi_ab; + struct wmi_mlo_link_set_active_cmd *cmd; + u32 num_inactive_vdev_bitmap = 0; + u32 num_disallow_mode_comb = 0; + struct wmi_tlv *tlv; + struct sk_buff *skb; + __le32 *vdev_bitmap; + void *buf_ptr; + int i, ret; + u32 len; + + if (!arg->num_vdev_bitmap && !arg->num_link_entry) { + ath12k_warn(ab, "Invalid num_vdev_bitmap and num_link_entry"); + return -EINVAL; + } + + switch (arg->force_mode) { + case WMI_MLO_LINK_FORCE_MODE_ACTIVE_LINK_NUM: + case WMI_MLO_LINK_FORCE_MODE_INACTIVE_LINK_NUM: + num_link_num_param = arg->num_link_entry; + fallthrough; + case WMI_MLO_LINK_FORCE_MODE_ACTIVE: + case WMI_MLO_LINK_FORCE_MODE_INACTIVE: + case WMI_MLO_LINK_FORCE_MODE_NO_FORCE: + num_vdev_bitmap = arg->num_vdev_bitmap; + break; + case WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE: + num_vdev_bitmap = arg->num_vdev_bitmap; + num_inactive_vdev_bitmap = arg->num_inactive_vdev_bitmap; + break; + default: + ath12k_warn(ab, "Invalid force mode: %u", arg->force_mode); + return -EINVAL; + } + + num_disallow_mode_comb = arg->num_disallow_mode_comb; + len = sizeof(*cmd) + + TLV_HDR_SIZE + sizeof(*link_num_param) * num_link_num_param + + TLV_HDR_SIZE + sizeof(*vdev_bitmap) * num_vdev_bitmap + + TLV_HDR_SIZE + TLV_HDR_SIZE + TLV_HDR_SIZE + + TLV_HDR_SIZE + sizeof(*disallowed_mode_bmap) * num_disallow_mode_comb; + if (arg->force_mode == WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE) + len += sizeof(*vdev_bitmap) * num_inactive_vdev_bitmap; + + skb = ath12k_wmi_alloc_skb(wmi_ab, len); + if (!skb) + return -ENOMEM; + + cmd = (struct wmi_mlo_link_set_active_cmd *)skb->data; + cmd->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_MLO_LINK_SET_ACTIVE_CMD, + sizeof(*cmd)); + cmd->force_mode = cpu_to_le32(arg->force_mode); + cmd->reason = cpu_to_le32(arg->reason); + ath12k_dbg(ab, ATH12K_DBG_WMI, + "mode %d reason %d num_link_num_param %d num_vdev_bitmap %d inactive %d num_disallow_mode_comb %d", + arg->force_mode, arg->reason, num_link_num_param, + num_vdev_bitmap, num_inactive_vdev_bitmap, + num_disallow_mode_comb); + + buf_ptr = skb->data + sizeof(*cmd); + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_STRUCT, + sizeof(*link_num_param) * num_link_num_param); + buf_ptr += TLV_HDR_SIZE; + + if (num_link_num_param) { + cmd->ctrl_flags = + le32_encode_bits(arg->ctrl_flags.dync_force_link_num ? 1 : 0, + CRTL_F_DYNC_FORCE_LINK_NUM); + + link_num_param = buf_ptr; + for (i = 0; i < num_link_num_param; i++) { + link_num_param->tlv_header = + ath12k_wmi_tlv_cmd_hdr(0, sizeof(*link_num_param)); + link_num_param->num_of_link = + cpu_to_le32(arg->link_num[i].num_of_link); + link_num_param->vdev_type = + cpu_to_le32(arg->link_num[i].vdev_type); + link_num_param->vdev_subtype = + cpu_to_le32(arg->link_num[i].vdev_subtype); + link_num_param->home_freq = + cpu_to_le32(arg->link_num[i].home_freq); + ath12k_dbg(ab, ATH12K_DBG_WMI, + "entry %d num_of_link %d vdev type %d subtype %d freq %d control_flags %d", + i, arg->link_num[i].num_of_link, + arg->link_num[i].vdev_type, + arg->link_num[i].vdev_subtype, + arg->link_num[i].home_freq, + __le32_to_cpu(cmd->ctrl_flags)); + link_num_param++; + } + + buf_ptr += sizeof(*link_num_param) * num_link_num_param; + } + + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, + sizeof(*vdev_bitmap) * num_vdev_bitmap); + buf_ptr += TLV_HDR_SIZE; + + if (num_vdev_bitmap) { + vdev_bitmap = buf_ptr; + for (i = 0; i < num_vdev_bitmap; i++) { + vdev_bitmap[i] = cpu_to_le32(arg->vdev_bitmap[i]); + ath12k_dbg(ab, ATH12K_DBG_WMI, "entry %d vdev_id_bitmap 0x%x", + i, arg->vdev_bitmap[i]); + } + + buf_ptr += sizeof(*vdev_bitmap) * num_vdev_bitmap; + } + + if (arg->force_mode == WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE) { + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, + sizeof(*vdev_bitmap) * + num_inactive_vdev_bitmap); + buf_ptr += TLV_HDR_SIZE; + + if (num_inactive_vdev_bitmap) { + vdev_bitmap = buf_ptr; + for (i = 0; i < num_inactive_vdev_bitmap; i++) { + vdev_bitmap[i] = + cpu_to_le32(arg->inactive_vdev_bitmap[i]); + ath12k_dbg(ab, ATH12K_DBG_WMI, + "entry %d inactive_vdev_id_bitmap 0x%x", + i, arg->inactive_vdev_bitmap[i]); + } + + buf_ptr += sizeof(*vdev_bitmap) * num_inactive_vdev_bitmap; + } + } else { + /* add empty vdev bitmap2 tlv */ + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, 0); + buf_ptr += TLV_HDR_SIZE; + } + + /* add empty ieee_link_id_bitmap tlv */ + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, 0); + buf_ptr += TLV_HDR_SIZE; + + /* add empty ieee_link_id_bitmap2 tlv */ + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, 0); + buf_ptr += TLV_HDR_SIZE; + + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_STRUCT, + sizeof(*disallowed_mode_bmap) * + arg->num_disallow_mode_comb); + buf_ptr += TLV_HDR_SIZE; + + ret = ath12k_wmi_fill_disallowed_bmap(ab, buf_ptr, arg); + if (ret) + goto free_skb; + + ret = ath12k_wmi_cmd_send(&wmi_ab->wmi[0], skb, WMI_MLO_LINK_SET_ACTIVE_CMDID); + if (ret) { + ath12k_warn(ab, + "failed to send WMI_MLO_LINK_SET_ACTIVE_CMDID: %d\n", ret); + goto free_skb; + } + + ath12k_dbg(ab, ATH12K_DBG_WMI, "WMI mlo link set active cmd"); + + return ret; + +free_skb: + dev_kfree_skb(skb); + return ret; +} diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index ac18f75e0449..c640ffa180c8 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -1974,6 +1974,7 @@ enum wmi_tlv_tag { WMI_TAG_TPC_STATS_CTL_PWR_TABLE_EVENT, WMI_TAG_VDEV_SET_TPC_POWER_CMD = 0x3B5, WMI_TAG_VDEV_CH_POWER_INFO, + WMI_TAG_MLO_LINK_SET_ACTIVE_CMD = 0x3BE, WMI_TAG_EHT_RATE_SET = 0x3C4, WMI_TAG_DCS_AWGN_INT_TYPE = 0x3C5, WMI_TAG_MLO_TX_SEND_PARAMS, @@ -2617,6 +2618,8 @@ struct ath12k_wmi_soc_mac_phy_hw_mode_caps_params { __le32 num_chainmask_tables; } __packed; +#define WMI_HW_MODE_CAP_CFG_TYPE GENMASK(27, 0) + struct ath12k_wmi_hw_mode_cap_params { __le32 tlv_header; __le32 hw_mode_id; @@ -2666,6 +2669,12 @@ struct ath12k_wmi_mac_phy_caps_params { __le32 he_cap_info_2g_ext; __le32 he_cap_info_5g_ext; __le32 he_cap_info_internal; + __le32 wireless_modes; + __le32 low_2ghz_chan_freq; + __le32 high_2ghz_chan_freq; + __le32 low_5ghz_chan_freq; + __le32 high_5ghz_chan_freq; + __le32 nss_ratio; } __packed; struct ath12k_wmi_hal_reg_caps_ext_params { @@ -2739,6 +2748,11 @@ struct wmi_service_ready_ext2_event { __le32 default_num_msduq_supported_per_tid; } __packed; +struct ath12k_wmi_dbs_or_sbs_cap_params { + __le32 hw_mode_id; + __le32 sbs_lower_band_end_freq; +} __packed; + struct ath12k_wmi_caps_ext_params { __le32 hw_mode_id; __le32 pdev_and_hw_link_ids; @@ -5049,6 +5063,53 @@ struct ath12k_wmi_pdev { u32 rx_decap_mode; }; +struct ath12k_hw_mode_freq_range_arg { + u32 low_2ghz_freq; + u32 high_2ghz_freq; + u32 low_5ghz_freq; + u32 high_5ghz_freq; +}; + +struct ath12k_svc_ext_mac_phy_info { + enum wmi_host_hw_mode_config_type hw_mode_config_type; + u32 phy_id; + u32 supported_bands; + struct ath12k_hw_mode_freq_range_arg hw_freq_range; +}; + +#define ATH12K_MAX_MAC_PHY_CAP 8 + +struct ath12k_svc_ext_info { + u32 num_hw_modes; + struct ath12k_svc_ext_mac_phy_info mac_phy_info[ATH12K_MAX_MAC_PHY_CAP]; +}; + +/** + * enum ath12k_hw_mode - enum for host mode + * @ATH12K_HW_MODE_SMM: Single mac mode + * @ATH12K_HW_MODE_DBS: DBS mode + * @ATH12K_HW_MODE_SBS: SBS mode with either high share or low share + * @ATH12K_HW_MODE_SBS_UPPER_SHARE: Higher 5 GHz shared with 2.4 GHz + * @ATH12K_HW_MODE_SBS_LOWER_SHARE: Lower 5 GHz shared with 2.4 GHz + * @ATH12K_HW_MODE_MAX: Max, used to indicate invalid mode + */ +enum ath12k_hw_mode { + ATH12K_HW_MODE_SMM, + ATH12K_HW_MODE_DBS, + ATH12K_HW_MODE_SBS, + ATH12K_HW_MODE_SBS_UPPER_SHARE, + ATH12K_HW_MODE_SBS_LOWER_SHARE, + ATH12K_HW_MODE_MAX, +}; + +struct ath12k_hw_mode_info { + bool support_dbs:1; + bool support_sbs:1; + + struct ath12k_hw_mode_freq_range_arg freq_range_caps[ATH12K_HW_MODE_MAX] + [MAX_RADIOS]; +}; + struct ath12k_wmi_base { struct ath12k_base *ab; struct ath12k_wmi_pdev wmi[MAX_RADIOS]; @@ -5066,6 +5127,10 @@ struct ath12k_wmi_base { enum wmi_host_hw_mode_config_type preferred_hw_mode; struct ath12k_wmi_target_cap_arg *targ_cap; + + struct ath12k_svc_ext_info svc_ext_info; + u32 sbs_lower_band_end_freq; + struct ath12k_hw_mode_info hw_mode_info; }; struct wmi_pdev_set_bios_interface_cmd { @@ -5997,6 +6062,118 @@ struct wmi_vdev_set_tpc_power_cmd { */ } __packed; +#define CRTL_F_DYNC_FORCE_LINK_NUM GENMASK(3, 2) + +struct wmi_mlo_link_set_active_cmd { + __le32 tlv_header; + __le32 force_mode; + __le32 reason; + __le32 use_ieee_link_id_bitmap; + struct ath12k_wmi_mac_addr_params ap_mld_mac_addr; + __le32 ctrl_flags; +} __packed; + +struct wmi_mlo_set_active_link_number_params { + __le32 tlv_header; + __le32 num_of_link; + __le32 vdev_type; + __le32 vdev_subtype; + __le32 home_freq; +} __packed; + +#define WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_1 GENMASK(7, 0) +#define WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_2 GENMASK(15, 8) +#define WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_3 GENMASK(23, 16) +#define WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_4 GENMASK(31, 24) + +struct wmi_disallowed_mlo_mode_bitmap_params { + __le32 tlv_header; + __le32 disallowed_mode_bitmap; + __le32 ieee_link_id_comb; +} __packed; + +enum wmi_mlo_link_force_mode { + WMI_MLO_LINK_FORCE_MODE_ACTIVE = 1, + WMI_MLO_LINK_FORCE_MODE_INACTIVE = 2, + WMI_MLO_LINK_FORCE_MODE_ACTIVE_LINK_NUM = 3, + WMI_MLO_LINK_FORCE_MODE_INACTIVE_LINK_NUM = 4, + WMI_MLO_LINK_FORCE_MODE_NO_FORCE = 5, + WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE = 6, + WMI_MLO_LINK_FORCE_MODE_NON_FORCE_UPDATE = 7, +}; + +enum wmi_mlo_link_force_reason { + WMI_MLO_LINK_FORCE_REASON_NEW_CONNECT = 1, + WMI_MLO_LINK_FORCE_REASON_NEW_DISCONNECT = 2, + WMI_MLO_LINK_FORCE_REASON_LINK_REMOVAL = 3, + WMI_MLO_LINK_FORCE_REASON_TDLS = 4, + WMI_MLO_LINK_FORCE_REASON_REVERT_FAILURE = 5, + WMI_MLO_LINK_FORCE_REASON_LINK_DELETE = 6, + WMI_MLO_LINK_FORCE_REASON_SINGLE_LINK_EMLSR_OP = 7, +}; + +struct wmi_mlo_link_num_arg { + u32 num_of_link; + u32 vdev_type; + u32 vdev_subtype; + u32 home_freq; +}; + +struct wmi_mlo_control_flags_arg { + bool overwrite_force_active_bitmap; + bool overwrite_force_inactive_bitmap; + bool dync_force_link_num; + bool post_re_evaluate; + u8 post_re_evaluate_loops; + bool dont_reschedule_workqueue; +}; + +struct wmi_ml_link_force_cmd_arg { + u8 ap_mld_mac_addr[ETH_ALEN]; + u16 ieee_link_id_bitmap; + u16 ieee_link_id_bitmap2; + u8 link_num; +}; + +struct wmi_ml_disallow_mode_bmap_arg { + u32 disallowed_mode; + union { + u32 ieee_link_id_comb; + u8 ieee_link_id[4]; + }; +}; + +/* maximum size of link number param array + * for MLO link set active command + */ +#define WMI_MLO_LINK_NUM_SZ 2 + +/* maximum size of vdev bitmap array for + * MLO link set active command + */ +#define WMI_MLO_VDEV_BITMAP_SZ 2 + +/* Max number of disallowed bitmap combination + * sent to firmware + */ +#define WMI_ML_MAX_DISALLOW_BMAP_COMB 4 + +struct wmi_mlo_link_set_active_arg { + enum wmi_mlo_link_force_mode force_mode; + enum wmi_mlo_link_force_reason reason; + u32 num_link_entry; + u32 num_vdev_bitmap; + u32 num_inactive_vdev_bitmap; + struct wmi_mlo_link_num_arg link_num[WMI_MLO_LINK_NUM_SZ]; + u32 vdev_bitmap[WMI_MLO_VDEV_BITMAP_SZ]; + u32 inactive_vdev_bitmap[WMI_MLO_VDEV_BITMAP_SZ]; + struct wmi_mlo_control_flags_arg ctrl_flags; + bool use_ieee_link_id; + struct wmi_ml_link_force_cmd_arg force_cmd; + u32 num_disallow_mode_comb; + struct wmi_ml_disallow_mode_bmap_arg disallow_bmap[WMI_ML_MAX_DISALLOW_BMAP_COMB]; +}; + void ath12k_wmi_init_qcn9274(struct ath12k_base *ab, struct ath12k_wmi_resource_config_arg *config); void ath12k_wmi_init_wcn7850(struct ath12k_base *ab, @@ -6195,5 +6372,6 @@ bool ath12k_wmi_supports_6ghz_cc_ext(struct ath12k *ar); int ath12k_wmi_send_vdev_set_tpc_power(struct ath12k *ar, u32 vdev_id, struct ath12k_reg_tpc_power_info *param); - +int ath12k_wmi_send_mlo_link_set_active_cmd(struct ath12k_base *ab, + struct wmi_mlo_link_set_active_arg *param); #endif diff --git a/drivers/net/wireless/ath/ath6kl/bmi.c b/drivers/net/wireless/ath/ath6kl/bmi.c index af98e871199d..5a9e93fd1ef4 100644 --- a/drivers/net/wireless/ath/ath6kl/bmi.c +++ b/drivers/net/wireless/ath/ath6kl/bmi.c @@ -87,7 +87,9 @@ int ath6kl_bmi_get_target_info(struct ath6kl *ar, * We need to do some backwards compatibility to make this work. */ if (le32_to_cpu(targ_info->byte_count) != sizeof(*targ_info)) { - WARN_ON(1); + ath6kl_err("mismatched byte count %d vs. expected %zd\n", + le32_to_cpu(targ_info->byte_count), + sizeof(*targ_info)); return -EINVAL; } diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c index a3e03580cd9f..564ca6a61985 100644 --- a/drivers/net/wireless/ath/carl9170/usb.c +++ b/drivers/net/wireless/ath/carl9170/usb.c @@ -438,14 +438,21 @@ static void carl9170_usb_rx_complete(struct urb *urb) if (atomic_read(&ar->rx_anch_urbs) == 0) { /* - * The system is too slow to cope with - * the enormous workload. We have simply - * run out of active rx urbs and this - * unfortunately leads to an unpredictable - * device. + * At this point, either the system is too slow to + * cope with the enormous workload (so we have simply + * run out of active rx urbs and this unfortunately + * leads to an unpredictable device), or the device + * is not fully functional after an unsuccessful + * firmware loading attempts (so it doesn't pass + * ieee80211_register_hw() and there is no internal + * workqueue at all). */ - ieee80211_queue_work(ar->hw, &ar->ping_work); + if (ar->registered) + ieee80211_queue_work(ar->hw, &ar->ping_work); + else + pr_warn_once("device %s is not registered\n", + dev_name(&ar->udev->dev)); } } else { /* diff --git a/drivers/net/wireless/intel/iwlegacy/4965-rs.c b/drivers/net/wireless/intel/iwlegacy/4965-rs.c index 0e5130d1fccd..031d88bf6393 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-rs.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-rs.c @@ -203,7 +203,8 @@ il4965_rs_extract_rate(u32 rate_n_flags) return (u8) (rate_n_flags & 0xFF); } -static void +/* noinline works around https://github.com/llvm/llvm-project/issues/143908 */ +static noinline_for_stack void il4965_rs_rate_scale_clear_win(struct il_rate_scale_data *win) { win->data = 0; diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/main.c b/drivers/net/wireless/intel/iwlwifi/dvm/main.c index dbfd45948e8b..66211426aa3a 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/main.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/main.c @@ -1316,6 +1316,7 @@ static struct iwl_op_mode *iwl_op_mode_dvm_start(struct iwl_trans *trans, sizeof(trans->conf.no_reclaim_cmds)); memcpy(trans->conf.no_reclaim_cmds, no_reclaim_cmds, sizeof(no_reclaim_cmds)); + trans->conf.n_no_reclaim_cmds = ARRAY_SIZE(no_reclaim_cmds); switch (iwlwifi_mod_params.amsdu_size) { case IWL_AMSDU_DEF: diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c index e8820e7cf8fa..1774bb84dd3f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c @@ -77,6 +77,7 @@ void iwl_construct_mld(struct iwl_mld *mld, struct iwl_trans *trans, /* Setup async RX handling */ spin_lock_init(&mld->async_handlers_lock); + INIT_LIST_HEAD(&mld->async_handlers_list); wiphy_work_init(&mld->async_handlers_wk, iwl_mld_async_handlers_wk); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c index 81ca9ff67be9..3f8b840871d3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c @@ -32,9 +32,9 @@ static void iwl_mvm_mld_mac_ctxt_cmd_common(struct iwl_mvm *mvm, unsigned int link_id; int cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, WIDE_ID(MAC_CONF_GROUP, - MAC_CONFIG_CMD), 0); + MAC_CONFIG_CMD), 1); - if (WARN_ON(cmd_ver < 1 && cmd_ver > 3)) + if (WARN_ON(cmd_ver > 3)) return; cmd->id_and_color = cpu_to_le32(mvmvif->id); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c index cb36baac14da..4f2be0c1bd97 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c @@ -166,7 +166,7 @@ int iwl_pcie_ctxt_info_init(struct iwl_trans *trans, struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_context_info *ctxt_info; struct iwl_context_info_rbd_cfg *rx_cfg; - u32 control_flags = 0, rb_size; + u32 control_flags = 0, rb_size, cb_size; dma_addr_t phys; int ret; @@ -202,11 +202,12 @@ int iwl_pcie_ctxt_info_init(struct iwl_trans *trans, rb_size = IWL_CTXT_INFO_RB_SIZE_4K; } - WARN_ON(RX_QUEUE_CB_SIZE(iwl_trans_get_num_rbds(trans)) > 12); + cb_size = RX_QUEUE_CB_SIZE(iwl_trans_get_num_rbds(trans)); + if (WARN_ON(cb_size > 12)) + cb_size = 12; + control_flags = IWL_CTXT_INFO_TFD_FORMAT_LONG; - control_flags |= - u32_encode_bits(RX_QUEUE_CB_SIZE(iwl_trans_get_num_rbds(trans)), - IWL_CTXT_INFO_RB_CB_SIZE); + control_flags |= u32_encode_bits(cb_size, IWL_CTXT_INFO_RB_CB_SIZE); control_flags |= u32_encode_bits(rb_size, IWL_CTXT_INFO_RB_SIZE); ctxt_info->control.control_flags = cpu_to_le32(control_flags); diff --git a/drivers/net/wireless/marvell/mwifiex/util.c b/drivers/net/wireless/marvell/mwifiex/util.c index 4c5b1de0e936..6882e90e90b2 100644 --- a/drivers/net/wireless/marvell/mwifiex/util.c +++ b/drivers/net/wireless/marvell/mwifiex/util.c @@ -459,7 +459,9 @@ mwifiex_process_mgmt_packet(struct mwifiex_private *priv, "auth: receive authentication from %pM\n", ieee_hdr->addr3); } else { - if (!priv->wdev.connected) + if (!priv->wdev.connected || + !ether_addr_equal(ieee_hdr->addr3, + priv->curr_bss_params.bss_descriptor.mac_address)) return 0; if (ieee80211_is_deauth(ieee_hdr->frame_control)) { diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 5f8d81cda6cd..74b75035d361 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -1224,6 +1224,16 @@ static inline int mt76_wed_dma_setup(struct mt76_dev *dev, struct mt76_queue *q, #define mt76_dereference(p, dev) \ rcu_dereference_protected(p, lockdep_is_held(&(dev)->mutex)) +static inline struct mt76_wcid * +__mt76_wcid_ptr(struct mt76_dev *dev, u16 idx) +{ + if (idx >= ARRAY_SIZE(dev->wcid)) + return NULL; + return rcu_dereference(dev->wcid[idx]); +} + +#define mt76_wcid_ptr(dev, idx) __mt76_wcid_ptr(&(dev)->mt76, idx) + struct mt76_dev *mt76_alloc_device(struct device *pdev, unsigned int size, const struct ieee80211_ops *ops, const struct mt76_driver_ops *drv_ops); diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c index 863e5770df51..e26cc78fff94 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c @@ -44,7 +44,7 @@ mt7603_rx_loopback_skb(struct mt7603_dev *dev, struct sk_buff *skb) if (idx >= MT7603_WTBL_STA - 1) goto free; - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (!wcid) goto free; diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c index 413973d05b43..6387f9e61060 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c @@ -487,10 +487,7 @@ mt7603_rx_get_wcid(struct mt7603_dev *dev, u8 idx, bool unicast) struct mt7603_sta *sta; struct mt76_wcid *wcid; - if (idx >= MT7603_WTBL_SIZE) - return NULL; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (unicast || !wcid) return wcid; @@ -1266,12 +1263,9 @@ void mt7603_mac_add_txs(struct mt7603_dev *dev, void *data) if (pid == MT_PACKET_ID_NO_ACK) return; - if (wcidx >= MT7603_WTBL_SIZE) - return; - rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c index 3ca4fae7c4b0..f8d2cc94b742 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c @@ -90,10 +90,7 @@ static struct mt76_wcid *mt7615_rx_get_wcid(struct mt7615_dev *dev, struct mt7615_sta *sta; struct mt76_wcid *wcid; - if (idx >= MT7615_WTBL_SIZE) - return NULL; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (unicast || !wcid) return wcid; @@ -1504,7 +1501,7 @@ static void mt7615_mac_add_txs(struct mt7615_dev *dev, void *data) rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c index e9ac8a7317a1..0db00efe88b0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c @@ -1172,7 +1172,7 @@ void mt76_connac2_txwi_free(struct mt76_dev *dev, struct mt76_txwi_cache *t, wcid_idx = wcid->idx; } else { wcid_idx = le32_get_bits(txwi[1], MT_TXD1_WLAN_IDX); - wcid = rcu_dereference(dev->wcid[wcid_idx]); + wcid = __mt76_wcid_ptr(dev, wcid_idx); if (wcid && wcid->sta) { sta = container_of((void *)wcid, struct ieee80211_sta, diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c index cb13d0a76878..16db0f2082d1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c @@ -287,7 +287,7 @@ __mt76_connac_mcu_alloc_sta_req(struct mt76_dev *dev, struct mt76_vif_link *mvif mt76_connac_mcu_get_wlan_idx(dev, wcid, &hdr.wlan_idx_lo, &hdr.wlan_idx_hi); - skb = mt76_mcu_msg_alloc(dev, NULL, len); + skb = __mt76_mcu_msg_alloc(dev, NULL, len, len, GFP_ATOMIC); if (!skb) return ERR_PTR(-ENOMEM); @@ -1740,8 +1740,8 @@ int mt76_connac_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, if (!sreq->ssids[i].ssid_len) continue; - req->ssids[i].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len); - memcpy(req->ssids[i].ssid, sreq->ssids[i].ssid, + req->ssids[n_ssids].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len); + memcpy(req->ssids[n_ssids].ssid, sreq->ssids[i].ssid, sreq->ssids[i].ssid_len); n_ssids++; } diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h index 4cd63bacd742..9d7ee09b6cc9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h @@ -262,10 +262,7 @@ mt76x02_rx_get_sta(struct mt76_dev *dev, u8 idx) { struct mt76_wcid *wcid; - if (idx >= MT76x02_N_WCIDS) - return NULL; - - wcid = rcu_dereference(dev->wcid[idx]); + wcid = __mt76_wcid_ptr(dev, idx); if (!wcid) return NULL; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index d5db6ffd6d36..83488b2d6efb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -564,9 +564,7 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev, rcu_read_lock(); - if (stat->wcid < MT76x02_N_WCIDS) - wcid = rcu_dereference(dev->mt76.wcid[stat->wcid]); - + wcid = mt76_wcid_ptr(dev, stat->wcid); if (wcid && wcid->sta) { void *priv; diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index 9400e4af2a04..6639976afcee 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -56,10 +56,7 @@ static struct mt76_wcid *mt7915_rx_get_wcid(struct mt7915_dev *dev, struct mt7915_sta *sta; struct mt76_wcid *wcid; - if (idx >= ARRAY_SIZE(dev->mt76.wcid)) - return NULL; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (unicast || !wcid) return wcid; @@ -917,7 +914,7 @@ mt7915_mac_tx_free(struct mt7915_dev *dev, void *data, int len) u16 idx; idx = FIELD_GET(MT_TX_FREE_WLAN_ID, info); - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); sta = wcid_to_sta(wcid); if (!sta) continue; @@ -1013,12 +1010,9 @@ static void mt7915_mac_add_txs(struct mt7915_dev *dev, void *data) if (pid < MT_PACKET_ID_WED) return; - if (wcidx >= mt7915_wtbl_size(dev)) - return; - rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index 427542777abc..c6584d2b7509 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -3986,7 +3986,7 @@ int mt7915_mcu_wed_wa_tx_stats(struct mt7915_dev *dev, u16 wlan_idx) rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]); + wcid = mt76_wcid_ptr(dev, wlan_idx); if (wcid) wcid->stats.tx_packets += le32_to_cpu(res->tx_packets); else diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c index 9c4d5cea0c42..4a82f8e4c118 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c @@ -587,12 +587,9 @@ static void mt7915_mmio_wed_update_rx_stats(struct mtk_wed_device *wed, dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed); - if (idx >= mt7915_wtbl_size(dev)) - return; - rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (wcid) { wcid->stats.rx_bytes += le32_to_cpu(stats->rx_byte_cnt); wcid->stats.rx_packets += le32_to_cpu(stats->rx_pkt_cnt); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c index 5dd57de59f27..f1f76506b0a5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c @@ -465,7 +465,7 @@ void mt7921_mac_add_txs(struct mt792x_dev *dev, void *data) rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; @@ -516,7 +516,7 @@ static void mt7921_mac_tx_free(struct mt792x_dev *dev, void *data, int len) count++; idx = FIELD_GET(MT_TX_FREE_WLAN_ID, info); - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); sta = wcid_to_sta(wcid); if (!sta) continue; @@ -816,7 +816,7 @@ void mt7921_usb_sdio_tx_complete_skb(struct mt76_dev *mdev, u16 idx; idx = le32_get_bits(txwi[1], MT_TXD1_WLAN_IDX); - wcid = rcu_dereference(mdev->wcid[idx]); + wcid = __mt76_wcid_ptr(mdev, idx); sta = wcid_to_sta(wcid); if (sta && likely(e->skb->protocol != cpu_to_be16(ETH_P_PAE))) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 1fffa43379b2..77f73ae1d7ec 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -1180,6 +1180,9 @@ static void mt7921_sta_set_decap_offload(struct ieee80211_hw *hw, struct mt792x_sta *msta = (struct mt792x_sta *)sta->drv_priv; struct mt792x_dev *dev = mt792x_hw_dev(hw); + if (!msta->deflink.wcid.sta) + return; + mt792x_mutex_acquire(dev); if (enabled) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/init.c b/drivers/net/wireless/mediatek/mt76/mt7925/init.c index 2a83ff59a968..4249bad83c93 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/init.c @@ -52,6 +52,8 @@ static int mt7925_thermal_init(struct mt792x_phy *phy) name = devm_kasprintf(&wiphy->dev, GFP_KERNEL, "mt7925_%s", wiphy_name(wiphy)); + if (!name) + return -ENOMEM; hwmon = devm_hwmon_device_register_with_groups(&wiphy->dev, name, phy, mt7925_hwmon_groups); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c index c871d2f9688b..75823c9fd3a1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mac.c @@ -1040,7 +1040,7 @@ void mt7925_mac_add_txs(struct mt792x_dev *dev, void *data) rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; @@ -1122,7 +1122,7 @@ mt7925_mac_tx_free(struct mt792x_dev *dev, void *data, int len) u16 idx; idx = FIELD_GET(MT_TXFREE_INFO_WLAN_ID, info); - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); sta = wcid_to_sta(wcid); if (!sta) continue; @@ -1445,7 +1445,7 @@ void mt7925_usb_sdio_tx_complete_skb(struct mt76_dev *mdev, u16 idx; idx = le32_get_bits(txwi[1], MT_TXD1_WLAN_IDX); - wcid = rcu_dereference(mdev->wcid[idx]); + wcid = __mt76_wcid_ptr(mdev, idx); sta = wcid_to_sta(wcid); if (sta && likely(e->skb->protocol != cpu_to_be16(ETH_P_PAE))) diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 94b0099dcd41..5b001548dffc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -1481,7 +1481,7 @@ mt7925_start_sched_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif, mt792x_mutex_acquire(dev); - err = mt7925_mcu_sched_scan_req(mphy, vif, req); + err = mt7925_mcu_sched_scan_req(mphy, vif, req, ies); if (err < 0) goto out; @@ -1603,6 +1603,9 @@ static void mt7925_sta_set_decap_offload(struct ieee80211_hw *hw, unsigned long valid = mvif->valid_links; u8 i; + if (!msta->vif) + return; + mt792x_mutex_acquire(dev); valid = ieee80211_vif_is_mld(vif) ? mvif->valid_links : BIT(0); @@ -1617,6 +1620,9 @@ static void mt7925_sta_set_decap_offload(struct ieee80211_hw *hw, else clear_bit(MT_WCID_FLAG_HDR_TRANS, &mlink->wcid.flags); + if (!mlink->wcid.sta) + continue; + mt7925_mcu_wtbl_update_hdr_trans(dev, vif, sta, i); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index b8542be0d945..8ac6fbb736ab 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -164,6 +164,7 @@ mt7925_connac_mcu_set_wow_ctrl(struct mt76_phy *phy, struct ieee80211_vif *vif, bool suspend, struct cfg80211_wowlan *wowlan) { struct mt76_vif_link *mvif = (struct mt76_vif_link *)vif->drv_priv; + struct ieee80211_scan_ies ies = {}; struct mt76_dev *dev = phy->dev; struct { struct { @@ -194,7 +195,7 @@ mt7925_connac_mcu_set_wow_ctrl(struct mt76_phy *phy, struct ieee80211_vif *vif, req.wow_ctrl_tlv.trigger |= (UNI_WOW_DETECT_TYPE_DISCONNECT | UNI_WOW_DETECT_TYPE_BCN_LOST); if (wowlan->nd_config) { - mt7925_mcu_sched_scan_req(phy, vif, wowlan->nd_config); + mt7925_mcu_sched_scan_req(phy, vif, wowlan->nd_config, &ies); req.wow_ctrl_tlv.trigger |= UNI_WOW_DETECT_TYPE_SCH_SCAN_HIT; mt7925_mcu_sched_scan_enable(phy, vif, suspend); } @@ -2818,6 +2819,54 @@ int mt7925_mcu_set_dbdc(struct mt76_phy *phy, bool enable) return err; } +static void +mt7925_mcu_build_scan_ie_tlv(struct mt76_dev *mdev, + struct sk_buff *skb, + struct ieee80211_scan_ies *scan_ies) +{ + u32 max_len = sizeof(struct scan_ie_tlv) + MT76_CONNAC_SCAN_IE_LEN; + struct scan_ie_tlv *ie; + enum nl80211_band i; + struct tlv *tlv; + const u8 *ies; + u16 ies_len; + + for (i = 0; i <= NL80211_BAND_6GHZ; i++) { + if (i == NL80211_BAND_60GHZ) + continue; + + ies = scan_ies->ies[i]; + ies_len = scan_ies->len[i]; + + if (!ies || !ies_len) + continue; + + if (ies_len > max_len) + return; + + tlv = mt76_connac_mcu_add_tlv(skb, UNI_SCAN_IE, + sizeof(*ie) + ies_len); + ie = (struct scan_ie_tlv *)tlv; + + memcpy(ie->ies, ies, ies_len); + ie->ies_len = cpu_to_le16(ies_len); + + switch (i) { + case NL80211_BAND_2GHZ: + ie->band = 1; + break; + case NL80211_BAND_6GHZ: + ie->band = 3; + break; + default: + ie->band = 2; + break; + } + + max_len -= (sizeof(*ie) + ies_len); + } +} + int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, struct ieee80211_scan_request *scan_req) { @@ -2843,7 +2892,8 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, max_len = sizeof(*hdr) + sizeof(*req) + sizeof(*ssid) + sizeof(*bssid) * MT7925_RNR_SCAN_MAX_BSSIDS + - sizeof(*chan_info) + sizeof(*misc) + sizeof(*ie); + sizeof(*chan_info) + sizeof(*misc) + sizeof(*ie) + + MT76_CONNAC_SCAN_IE_LEN; skb = mt76_mcu_msg_alloc(mdev, NULL, max_len); if (!skb) @@ -2869,8 +2919,8 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, if (i > MT7925_RNR_SCAN_MAX_BSSIDS) break; - ssid->ssids[i].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len); - memcpy(ssid->ssids[i].ssid, sreq->ssids[i].ssid, + ssid->ssids[n_ssids].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len); + memcpy(ssid->ssids[n_ssids].ssid, sreq->ssids[i].ssid, sreq->ssids[i].ssid_len); n_ssids++; } @@ -2925,13 +2975,6 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, } chan_info->channel_type = sreq->n_channels ? 4 : 0; - tlv = mt76_connac_mcu_add_tlv(skb, UNI_SCAN_IE, sizeof(*ie)); - ie = (struct scan_ie_tlv *)tlv; - if (sreq->ie_len > 0) { - memcpy(ie->ies, sreq->ie, sreq->ie_len); - ie->ies_len = cpu_to_le16(sreq->ie_len); - } - req->scan_func |= SCAN_FUNC_SPLIT_SCAN; tlv = mt76_connac_mcu_add_tlv(skb, UNI_SCAN_MISC, sizeof(*misc)); @@ -2942,6 +2985,9 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, req->scan_func |= SCAN_FUNC_RANDOM_MAC; } + /* Append scan probe IEs as the last tlv */ + mt7925_mcu_build_scan_ie_tlv(mdev, skb, &scan_req->ies); + err = mt76_mcu_skb_send_msg(mdev, skb, MCU_UNI_CMD(SCAN_REQ), true); if (err < 0) @@ -2953,7 +2999,8 @@ EXPORT_SYMBOL_GPL(mt7925_mcu_hw_scan); int mt7925_mcu_sched_scan_req(struct mt76_phy *phy, struct ieee80211_vif *vif, - struct cfg80211_sched_scan_request *sreq) + struct cfg80211_sched_scan_request *sreq, + struct ieee80211_scan_ies *ies) { struct mt76_vif_link *mvif = (struct mt76_vif_link *)vif->drv_priv; struct ieee80211_channel **scan_list = sreq->channels; @@ -3041,12 +3088,8 @@ int mt7925_mcu_sched_scan_req(struct mt76_phy *phy, } chan_info->channel_type = sreq->n_channels ? 4 : 0; - tlv = mt76_connac_mcu_add_tlv(skb, UNI_SCAN_IE, sizeof(*ie)); - ie = (struct scan_ie_tlv *)tlv; - if (sreq->ie_len > 0) { - memcpy(ie->ies, sreq->ie, sreq->ie_len); - ie->ies_len = cpu_to_le16(sreq->ie_len); - } + /* Append scan probe IEs as the last tlv */ + mt7925_mcu_build_scan_ie_tlv(mdev, skb, ies); return mt76_mcu_skb_send_msg(mdev, skb, MCU_UNI_CMD(SCAN_REQ), true); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h index ee6fb16e83c5..a40764d89a1f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.h @@ -269,7 +269,7 @@ struct scan_ie_tlv { __le16 ies_len; u8 band; u8 pad; - u8 ies[MT76_CONNAC_SCAN_IE_LEN]; + u8 ies[]; }; struct scan_misc_tlv { @@ -673,7 +673,8 @@ int mt7925_mcu_cancel_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif); int mt7925_mcu_sched_scan_req(struct mt76_phy *phy, struct ieee80211_vif *vif, - struct cfg80211_sched_scan_request *sreq); + struct cfg80211_sched_scan_request *sreq, + struct ieee80211_scan_ies *ies); int mt7925_mcu_sched_scan_enable(struct mt76_phy *phy, struct ieee80211_vif *vif, bool enable); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/regs.h b/drivers/net/wireless/mediatek/mt76/mt7925/regs.h index 547489092c29..341987e47f67 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/regs.h +++ b/drivers/net/wireless/mediatek/mt76/mt7925/regs.h @@ -58,7 +58,7 @@ #define MT_INT_TX_DONE_MCU (MT_INT_TX_DONE_MCU_WM | \ MT_INT_TX_DONE_FWDL) -#define MT_INT_TX_DONE_ALL (MT_INT_TX_DONE_MCU_WM | \ +#define MT_INT_TX_DONE_ALL (MT_INT_TX_DONE_MCU | \ MT_INT_TX_DONE_BAND0 | \ GENMASK(18, 4)) diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c index a50c1723ca29..05130ec1e5f7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c @@ -28,7 +28,7 @@ static const struct ieee80211_iface_combination if_comb[] = { }, }; -static const struct ieee80211_iface_limit if_limits_chanctx[] = { +static const struct ieee80211_iface_limit if_limits_chanctx_mcc[] = { { .max = 2, .types = BIT(NL80211_IFTYPE_STATION) | @@ -36,8 +36,23 @@ static const struct ieee80211_iface_limit if_limits_chanctx[] = { }, { .max = 1, - .types = BIT(NL80211_IFTYPE_AP) | - BIT(NL80211_IFTYPE_P2P_GO) + .types = BIT(NL80211_IFTYPE_P2P_GO) + }, + { + .max = 1, + .types = BIT(NL80211_IFTYPE_P2P_DEVICE) + } +}; + +static const struct ieee80211_iface_limit if_limits_chanctx_scc[] = { + { + .max = 2, + .types = BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_P2P_CLIENT) + }, + { + .max = 1, + .types = BIT(NL80211_IFTYPE_AP) }, { .max = 1, @@ -47,11 +62,18 @@ static const struct ieee80211_iface_limit if_limits_chanctx[] = { static const struct ieee80211_iface_combination if_comb_chanctx[] = { { - .limits = if_limits_chanctx, - .n_limits = ARRAY_SIZE(if_limits_chanctx), + .limits = if_limits_chanctx_mcc, + .n_limits = ARRAY_SIZE(if_limits_chanctx_mcc), .max_interfaces = 3, .num_different_channels = 2, .beacon_int_infra_match = false, + }, + { + .limits = if_limits_chanctx_scc, + .n_limits = ARRAY_SIZE(if_limits_chanctx_scc), + .max_interfaces = 3, + .num_different_channels = 1, + .beacon_int_infra_match = false, } }; diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_mac.c b/drivers/net/wireless/mediatek/mt76/mt792x_mac.c index 05978d9c7b91..3f1d9ba49076 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_mac.c @@ -142,10 +142,7 @@ struct mt76_wcid *mt792x_rx_get_wcid(struct mt792x_dev *dev, u16 idx, struct mt792x_sta *sta; struct mt76_wcid *wcid; - if (idx >= ARRAY_SIZE(dev->mt76.wcid)) - return NULL; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (unicast || !wcid) return wcid; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 0dbd4662bc84..92148518f6a5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -61,10 +61,7 @@ static struct mt76_wcid *mt7996_rx_get_wcid(struct mt7996_dev *dev, struct mt76_wcid *wcid; int i; - if (idx >= ARRAY_SIZE(dev->mt76.wcid)) - return NULL; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (!wcid) return NULL; @@ -1249,7 +1246,7 @@ mt7996_mac_tx_free(struct mt7996_dev *dev, void *data, int len) u16 idx; idx = FIELD_GET(MT_TXFREE_INFO_WLAN_ID, info); - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); sta = wcid_to_sta(wcid); if (!sta) goto next; @@ -1471,12 +1468,9 @@ static void mt7996_mac_add_txs(struct mt7996_dev *dev, void *data) if (pid < MT_PACKET_ID_NO_SKB) return; - if (wcidx >= mt7996_wtbl_size(dev)) - return; - rcu_read_lock(); - wcid = rcu_dereference(dev->mt76.wcid[wcidx]); + wcid = mt76_wcid_ptr(dev, wcidx); if (!wcid) goto out; @@ -2353,20 +2347,12 @@ void mt7996_mac_update_stats(struct mt7996_phy *phy) void mt7996_mac_sta_rc_work(struct work_struct *work) { struct mt7996_dev *dev = container_of(work, struct mt7996_dev, rc_work); - struct ieee80211_bss_conf *link_conf; - struct ieee80211_link_sta *link_sta; struct mt7996_sta_link *msta_link; - struct mt7996_vif_link *link; - struct mt76_vif_link *mlink; - struct ieee80211_sta *sta; struct ieee80211_vif *vif; - struct mt7996_sta *msta; struct mt7996_vif *mvif; LIST_HEAD(list); u32 changed; - u8 link_id; - rcu_read_lock(); spin_lock_bh(&dev->mt76.sta_poll_lock); list_splice_init(&dev->sta_rc_list, &list); @@ -2377,46 +2363,28 @@ void mt7996_mac_sta_rc_work(struct work_struct *work) changed = msta_link->changed; msta_link->changed = 0; - - sta = wcid_to_sta(&msta_link->wcid); - link_id = msta_link->wcid.link_id; - msta = msta_link->sta; - mvif = msta->vif; - vif = container_of((void *)mvif, struct ieee80211_vif, drv_priv); - - mlink = rcu_dereference(mvif->mt76.link[link_id]); - if (!mlink) - continue; - - link_sta = rcu_dereference(sta->link[link_id]); - if (!link_sta) - continue; - - link_conf = rcu_dereference(vif->link_conf[link_id]); - if (!link_conf) - continue; + mvif = msta_link->sta->vif; + vif = container_of((void *)mvif, struct ieee80211_vif, + drv_priv); spin_unlock_bh(&dev->mt76.sta_poll_lock); - link = (struct mt7996_vif_link *)mlink; - if (changed & (IEEE80211_RC_SUPP_RATES_CHANGED | IEEE80211_RC_NSS_CHANGED | IEEE80211_RC_BW_CHANGED)) - mt7996_mcu_add_rate_ctrl(dev, vif, link_conf, - link_sta, link, msta_link, + mt7996_mcu_add_rate_ctrl(dev, msta_link->sta, vif, + msta_link->wcid.link_id, true); if (changed & IEEE80211_RC_SMPS_CHANGED) - mt7996_mcu_set_fixed_field(dev, link_sta, link, - msta_link, NULL, + mt7996_mcu_set_fixed_field(dev, msta_link->sta, NULL, + msta_link->wcid.link_id, RATE_PARAM_MMPS_UPDATE); spin_lock_bh(&dev->mt76.sta_poll_lock); } spin_unlock_bh(&dev->mt76.sta_poll_lock); - rcu_read_unlock(); } void mt7996_mac_work(struct work_struct *work) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 78ae9f5cb176..07dd75ce94a5 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -1112,9 +1112,8 @@ mt7996_mac_sta_event(struct mt7996_dev *dev, struct ieee80211_vif *vif, if (err) return err; - err = mt7996_mcu_add_rate_ctrl(dev, vif, link_conf, - link_sta, link, - msta_link, false); + err = mt7996_mcu_add_rate_ctrl(dev, msta_link->sta, vif, + link_id, false); if (err) return err; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index f0adc0b4b8b6..994526c65bfc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -555,7 +555,7 @@ mt7996_mcu_rx_all_sta_info_event(struct mt7996_dev *dev, struct sk_buff *skb) switch (le16_to_cpu(res->tag)) { case UNI_ALL_STA_TXRX_RATE: wlan_idx = le16_to_cpu(res->rate[i].wlan_idx); - wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]); + wcid = mt76_wcid_ptr(dev, wlan_idx); if (!wcid) break; @@ -565,7 +565,7 @@ mt7996_mcu_rx_all_sta_info_event(struct mt7996_dev *dev, struct sk_buff *skb) break; case UNI_ALL_STA_TXRX_ADM_STAT: wlan_idx = le16_to_cpu(res->adm_stat[i].wlan_idx); - wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]); + wcid = mt76_wcid_ptr(dev, wlan_idx); if (!wcid) break; @@ -579,7 +579,7 @@ mt7996_mcu_rx_all_sta_info_event(struct mt7996_dev *dev, struct sk_buff *skb) break; case UNI_ALL_STA_TXRX_MSDU_COUNT: wlan_idx = le16_to_cpu(res->msdu_cnt[i].wlan_idx); - wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]); + wcid = mt76_wcid_ptr(dev, wlan_idx); if (!wcid) break; @@ -676,10 +676,7 @@ mt7996_mcu_wed_rro_event(struct mt7996_dev *dev, struct sk_buff *skb) e = (void *)skb->data; idx = le16_to_cpu(e->wlan_id); - if (idx >= ARRAY_SIZE(dev->mt76.wcid)) - break; - - wcid = rcu_dereference(dev->mt76.wcid[idx]); + wcid = mt76_wcid_ptr(dev, idx); if (!wcid || !wcid->sta) break; @@ -1905,22 +1902,35 @@ int mt7996_mcu_set_fixed_rate_ctrl(struct mt7996_dev *dev, MCU_WM_UNI_CMD(RA), true); } -int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, - struct ieee80211_link_sta *link_sta, - struct mt7996_vif_link *link, - struct mt7996_sta_link *msta_link, - void *data, u32 field) +int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, struct mt7996_sta *msta, + void *data, u8 link_id, u32 field) { - struct sta_phy_uni *phy = data; + struct mt7996_vif *mvif = msta->vif; + struct mt7996_sta_link *msta_link; struct sta_rec_ra_fixed_uni *ra; + struct sta_phy_uni *phy = data; + struct mt76_vif_link *mlink; struct sk_buff *skb; + int err = -ENODEV; struct tlv *tlv; - skb = __mt76_connac_mcu_alloc_sta_req(&dev->mt76, &link->mt76, + rcu_read_lock(); + + mlink = rcu_dereference(mvif->mt76.link[link_id]); + if (!mlink) + goto error_unlock; + + msta_link = rcu_dereference(msta->link[link_id]); + if (!msta_link) + goto error_unlock; + + skb = __mt76_connac_mcu_alloc_sta_req(&dev->mt76, mlink, &msta_link->wcid, MT7996_STA_UPDATE_MAX_SIZE); - if (IS_ERR(skb)) - return PTR_ERR(skb); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + goto error_unlock; + } tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_RA_UPDATE, sizeof(*ra)); ra = (struct sta_rec_ra_fixed_uni *)tlv; @@ -1935,106 +1945,149 @@ int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, if (phy) ra->phy = *phy; break; - case RATE_PARAM_MMPS_UPDATE: + case RATE_PARAM_MMPS_UPDATE: { + struct ieee80211_sta *sta = wcid_to_sta(&msta_link->wcid); + struct ieee80211_link_sta *link_sta; + + link_sta = rcu_dereference(sta->link[link_id]); + if (!link_sta) { + dev_kfree_skb(skb); + goto error_unlock; + } + ra->mmps_mode = mt7996_mcu_get_mmps_mode(link_sta->smps_mode); break; + } default: break; } ra->field = cpu_to_le32(field); + rcu_read_unlock(); + return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_WMWA_UNI_CMD(STA_REC_UPDATE), true); +error_unlock: + rcu_read_unlock(); + + return err; } static int -mt7996_mcu_add_rate_ctrl_fixed(struct mt7996_dev *dev, - struct ieee80211_link_sta *link_sta, - struct mt7996_vif_link *link, - struct mt7996_sta_link *msta_link) +mt7996_mcu_add_rate_ctrl_fixed(struct mt7996_dev *dev, struct mt7996_sta *msta, + struct ieee80211_vif *vif, u8 link_id) { - struct cfg80211_chan_def *chandef = &link->phy->mt76->chandef; - struct cfg80211_bitrate_mask *mask = &link->bitrate_mask; - enum nl80211_band band = chandef->chan->band; + struct ieee80211_link_sta *link_sta; + struct cfg80211_bitrate_mask mask; + struct mt7996_sta_link *msta_link; + struct mt7996_vif_link *link; struct sta_phy_uni phy = {}; - int ret, nrates = 0; + struct ieee80211_sta *sta; + int ret, nrates = 0, idx; + enum nl80211_band band; + bool has_he; #define __sta_phy_bitrate_mask_check(_mcs, _gi, _ht, _he) \ do { \ - u8 i, gi = mask->control[band]._gi; \ + u8 i, gi = mask.control[band]._gi; \ gi = (_he) ? gi : gi == NL80211_TXRATE_FORCE_SGI; \ phy.sgi = gi; \ - phy.he_ltf = mask->control[band].he_ltf; \ - for (i = 0; i < ARRAY_SIZE(mask->control[band]._mcs); i++) { \ - if (!mask->control[band]._mcs[i]) \ + phy.he_ltf = mask.control[band].he_ltf; \ + for (i = 0; i < ARRAY_SIZE(mask.control[band]._mcs); i++) { \ + if (!mask.control[band]._mcs[i]) \ continue; \ - nrates += hweight16(mask->control[band]._mcs[i]); \ - phy.mcs = ffs(mask->control[band]._mcs[i]) - 1; \ + nrates += hweight16(mask.control[band]._mcs[i]); \ + phy.mcs = ffs(mask.control[band]._mcs[i]) - 1; \ if (_ht) \ phy.mcs += 8 * i; \ } \ } while (0) - if (link_sta->he_cap.has_he) { + rcu_read_lock(); + + link = mt7996_vif_link(dev, vif, link_id); + if (!link) + goto error_unlock; + + msta_link = rcu_dereference(msta->link[link_id]); + if (!msta_link) + goto error_unlock; + + sta = wcid_to_sta(&msta_link->wcid); + link_sta = rcu_dereference(sta->link[link_id]); + if (!link_sta) + goto error_unlock; + + band = link->phy->mt76->chandef.chan->band; + has_he = link_sta->he_cap.has_he; + mask = link->bitrate_mask; + idx = msta_link->wcid.idx; + + if (has_he) { __sta_phy_bitrate_mask_check(he_mcs, he_gi, 0, 1); } else if (link_sta->vht_cap.vht_supported) { __sta_phy_bitrate_mask_check(vht_mcs, gi, 0, 0); } else if (link_sta->ht_cap.ht_supported) { __sta_phy_bitrate_mask_check(ht_mcs, gi, 1, 0); } else { - nrates = hweight32(mask->control[band].legacy); - phy.mcs = ffs(mask->control[band].legacy) - 1; + nrates = hweight32(mask.control[band].legacy); + phy.mcs = ffs(mask.control[band].legacy) - 1; } + + rcu_read_unlock(); + #undef __sta_phy_bitrate_mask_check /* fall back to auto rate control */ - if (mask->control[band].gi == NL80211_TXRATE_DEFAULT_GI && - mask->control[band].he_gi == GENMASK(7, 0) && - mask->control[band].he_ltf == GENMASK(7, 0) && + if (mask.control[band].gi == NL80211_TXRATE_DEFAULT_GI && + mask.control[band].he_gi == GENMASK(7, 0) && + mask.control[band].he_ltf == GENMASK(7, 0) && nrates != 1) return 0; /* fixed single rate */ if (nrates == 1) { - ret = mt7996_mcu_set_fixed_field(dev, link_sta, link, - msta_link, &phy, + ret = mt7996_mcu_set_fixed_field(dev, msta, &phy, link_id, RATE_PARAM_FIXED_MCS); if (ret) return ret; } /* fixed GI */ - if (mask->control[band].gi != NL80211_TXRATE_DEFAULT_GI || - mask->control[band].he_gi != GENMASK(7, 0)) { + if (mask.control[band].gi != NL80211_TXRATE_DEFAULT_GI || + mask.control[band].he_gi != GENMASK(7, 0)) { u32 addr; /* firmware updates only TXCMD but doesn't take WTBL into * account, so driver should update here to reflect the * actual txrate hardware sends out. */ - addr = mt7996_mac_wtbl_lmac_addr(dev, msta_link->wcid.idx, 7); - if (link_sta->he_cap.has_he) + addr = mt7996_mac_wtbl_lmac_addr(dev, idx, 7); + if (has_he) mt76_rmw_field(dev, addr, GENMASK(31, 24), phy.sgi); else mt76_rmw_field(dev, addr, GENMASK(15, 12), phy.sgi); - ret = mt7996_mcu_set_fixed_field(dev, link_sta, link, - msta_link, &phy, + ret = mt7996_mcu_set_fixed_field(dev, msta, &phy, link_id, RATE_PARAM_FIXED_GI); if (ret) return ret; } /* fixed HE_LTF */ - if (mask->control[band].he_ltf != GENMASK(7, 0)) { - ret = mt7996_mcu_set_fixed_field(dev, link_sta, link, - msta_link, &phy, + if (mask.control[band].he_ltf != GENMASK(7, 0)) { + ret = mt7996_mcu_set_fixed_field(dev, msta, &phy, link_id, RATE_PARAM_FIXED_HE_LTF); if (ret) return ret; } return 0; + +error_unlock: + rcu_read_unlock(); + + return -ENODEV; } static void @@ -2145,21 +2198,44 @@ mt7996_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7996_dev *dev, memset(ra->rx_rcpi, INIT_RCPI, sizeof(ra->rx_rcpi)); } -int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev, - struct ieee80211_vif *vif, - struct ieee80211_bss_conf *link_conf, - struct ieee80211_link_sta *link_sta, - struct mt7996_vif_link *link, - struct mt7996_sta_link *msta_link, bool changed) +int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev, struct mt7996_sta *msta, + struct ieee80211_vif *vif, u8 link_id, + bool changed) { + struct ieee80211_bss_conf *link_conf; + struct ieee80211_link_sta *link_sta; + struct mt7996_sta_link *msta_link; + struct mt7996_vif_link *link; + struct ieee80211_sta *sta; struct sk_buff *skb; - int ret; + int ret = -ENODEV; + + rcu_read_lock(); + + link = mt7996_vif_link(dev, vif, link_id); + if (!link) + goto error_unlock; + + msta_link = rcu_dereference(msta->link[link_id]); + if (!msta_link) + goto error_unlock; + + sta = wcid_to_sta(&msta_link->wcid); + link_sta = rcu_dereference(sta->link[link_id]); + if (!link_sta) + goto error_unlock; + + link_conf = rcu_dereference(vif->link_conf[link_id]); + if (!link_conf) + goto error_unlock; skb = __mt76_connac_mcu_alloc_sta_req(&dev->mt76, &link->mt76, &msta_link->wcid, MT7996_STA_UPDATE_MAX_SIZE); - if (IS_ERR(skb)) - return PTR_ERR(skb); + if (IS_ERR(skb)) { + ret = PTR_ERR(skb); + goto error_unlock; + } /* firmware rc algorithm refers to sta_rec_he for HE control. * once dev->rc_work changes the settings driver should also @@ -2173,12 +2249,19 @@ int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev, */ mt7996_mcu_sta_rate_ctrl_tlv(skb, dev, vif, link_conf, link_sta, link); + rcu_read_unlock(); + ret = mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_WMWA_UNI_CMD(STA_REC_UPDATE), true); if (ret) return ret; - return mt7996_mcu_add_rate_ctrl_fixed(dev, link_sta, link, msta_link); + return mt7996_mcu_add_rate_ctrl_fixed(dev, msta, vif, link_id); + +error_unlock: + rcu_read_unlock(); + + return ret; } static int diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index 1ad6bc046f7c..33ac16b64ef1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -620,23 +620,17 @@ int mt7996_mcu_beacon_inband_discov(struct mt7996_dev *dev, int mt7996_mcu_add_obss_spr(struct mt7996_phy *phy, struct mt7996_vif_link *link, struct ieee80211_he_obss_pd *he_obss_pd); -int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev, - struct ieee80211_vif *vif, - struct ieee80211_bss_conf *link_conf, - struct ieee80211_link_sta *link_sta, - struct mt7996_vif_link *link, - struct mt7996_sta_link *msta_link, bool changed); +int mt7996_mcu_add_rate_ctrl(struct mt7996_dev *dev, struct mt7996_sta *msta, + struct ieee80211_vif *vif, u8 link_id, + bool changed); int mt7996_set_channel(struct mt76_phy *mphy); int mt7996_mcu_set_chan_info(struct mt7996_phy *phy, u16 tag); int mt7996_mcu_set_tx(struct mt7996_dev *dev, struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf); int mt7996_mcu_set_fixed_rate_ctrl(struct mt7996_dev *dev, void *data, u16 version); -int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, - struct ieee80211_link_sta *link_sta, - struct mt7996_vif_link *link, - struct mt7996_sta_link *msta_link, - void *data, u32 field); +int mt7996_mcu_set_fixed_field(struct mt7996_dev *dev, struct mt7996_sta *msta, + void *data, u8 link_id, u32 field); int mt7996_mcu_set_eeprom(struct mt7996_dev *dev); int mt7996_mcu_get_eeprom(struct mt7996_dev *dev, u32 offset, u8 *buf, u32 buf_len); int mt7996_mcu_get_eeprom_free_block(struct mt7996_dev *dev, u8 *block_num); diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index 513916469ca2..e6cf16706667 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -64,7 +64,7 @@ mt76_tx_status_unlock(struct mt76_dev *dev, struct sk_buff_head *list) struct mt76_tx_cb *cb = mt76_tx_skb_cb(skb); struct mt76_wcid *wcid; - wcid = rcu_dereference(dev->wcid[cb->wcid]); + wcid = __mt76_wcid_ptr(dev, cb->wcid); if (wcid) { status.sta = wcid_to_sta(wcid); if (status.sta && (wcid->rate.flags || wcid->rate.legacy)) { @@ -251,9 +251,7 @@ void __mt76_tx_complete_skb(struct mt76_dev *dev, u16 wcid_idx, struct sk_buff * rcu_read_lock(); - if (wcid_idx < ARRAY_SIZE(dev->wcid)) - wcid = rcu_dereference(dev->wcid[wcid_idx]); - + wcid = __mt76_wcid_ptr(dev, wcid_idx); mt76_tx_check_non_aql(dev, wcid, skb); #ifdef CONFIG_NL80211_TESTMODE @@ -538,7 +536,7 @@ mt76_txq_schedule_list(struct mt76_phy *phy, enum mt76_txq_id qid) break; mtxq = (struct mt76_txq *)txq->drv_priv; - wcid = rcu_dereference(dev->wcid[mtxq->wcid]); + wcid = __mt76_wcid_ptr(dev, mtxq->wcid); if (!wcid || test_bit(MT_WCID_FLAG_PS, &wcid->flags)) continue; @@ -617,7 +615,8 @@ mt76_txq_schedule_pending_wcid(struct mt76_phy *phy, struct mt76_wcid *wcid, if ((dev->drv->drv_flags & MT_DRV_HW_MGMT_TXQ) && !(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) && !ieee80211_is_data(hdr->frame_control) && - !ieee80211_is_bufferable_mmpdu(skb)) + (!ieee80211_is_bufferable_mmpdu(skb) || + ieee80211_is_deauth(hdr->frame_control))) qid = MT_TXQ_PSD; q = phy->q_tx[qid]; diff --git a/drivers/net/wireless/mediatek/mt76/util.c b/drivers/net/wireless/mediatek/mt76/util.c index 95b3dc96e4c4..97249ebb4bc8 100644 --- a/drivers/net/wireless/mediatek/mt76/util.c +++ b/drivers/net/wireless/mediatek/mt76/util.c @@ -83,7 +83,7 @@ int mt76_get_min_avg_rssi(struct mt76_dev *dev, u8 phy_idx) if (!(mask & 1)) continue; - wcid = rcu_dereference(dev->wcid[j]); + wcid = __mt76_wcid_ptr(dev, j); if (!wcid || wcid->phy_idx != phy_idx) continue; diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00soc.c b/drivers/net/wireless/ralink/rt2x00/rt2x00soc.c index eface610178d..f7f3a2340c39 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00soc.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00soc.c @@ -108,7 +108,7 @@ exit_free_device: } EXPORT_SYMBOL_GPL(rt2x00soc_probe); -int rt2x00soc_remove(struct platform_device *pdev) +void rt2x00soc_remove(struct platform_device *pdev) { struct ieee80211_hw *hw = platform_get_drvdata(pdev); struct rt2x00_dev *rt2x00dev = hw->priv; @@ -119,8 +119,6 @@ int rt2x00soc_remove(struct platform_device *pdev) rt2x00lib_remove_dev(rt2x00dev); rt2x00soc_free_reg(rt2x00dev); ieee80211_free_hw(hw); - - return 0; } EXPORT_SYMBOL_GPL(rt2x00soc_remove); diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00soc.h b/drivers/net/wireless/ralink/rt2x00/rt2x00soc.h index 021fd06b3627..d6226b8a10e0 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00soc.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00soc.h @@ -17,7 +17,7 @@ * SoC driver handlers. */ int rt2x00soc_probe(struct platform_device *pdev, const struct rt2x00_ops *ops); -int rt2x00soc_remove(struct platform_device *pdev); +void rt2x00soc_remove(struct platform_device *pdev); #ifdef CONFIG_PM int rt2x00soc_suspend(struct platform_device *pdev, pm_message_t state); int rt2x00soc_resume(struct platform_device *pdev); diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c index 9653dbaac3c0..781510a3ec6d 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c @@ -583,7 +583,11 @@ void zd_mac_tx_to_dev(struct sk_buff *skb, int error) skb_queue_tail(q, skb); while (skb_queue_len(q) > ZD_MAC_MAX_ACK_WAITERS) { - zd_mac_tx_status(hw, skb_dequeue(q), + skb = skb_dequeue(q); + if (!skb) + break; + + zd_mac_tx_status(hw, skb, mac->ack_pending ? mac->ack_signal : 0, NULL); mac->ack_pending = 0; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 92697f98c601..7493e5aa984c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -386,7 +386,7 @@ static void nvme_log_err_passthru(struct request *req) nr->cmd->common.cdw12, nr->cmd->common.cdw13, nr->cmd->common.cdw14, - nr->cmd->common.cdw14); + nr->cmd->common.cdw15); } enum nvme_disposition { @@ -2015,21 +2015,41 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl, } -static void nvme_update_atomic_write_disk_info(struct nvme_ns *ns, - struct nvme_id_ns *id, struct queue_limits *lim, - u32 bs, u32 atomic_bs) +static u32 nvme_configure_atomic_write(struct nvme_ns *ns, + struct nvme_id_ns *id, struct queue_limits *lim, u32 bs) { - unsigned int boundary = 0; + u32 atomic_bs, boundary = 0; - if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) { - if (le16_to_cpu(id->nabspf)) + /* + * We do not support an offset for the atomic boundaries. + */ + if (id->nabo) + return bs; + + if ((id->nsfeat & NVME_NS_FEAT_ATOMICS) && id->nawupf) { + /* + * Use the per-namespace atomic write unit when available. + */ + atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; + if (id->nabspf) boundary = (le16_to_cpu(id->nabspf) + 1) * bs; + } else { + /* + * Use the controller wide atomic write unit. This sucks + * because the limit is defined in terms of logical blocks while + * namespaces can have different formats, and because there is + * no clear language in the specification prohibiting different + * values for different controllers in the subsystem. + */ + atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; } + lim->atomic_write_hw_max = atomic_bs; lim->atomic_write_hw_boundary = boundary; lim->atomic_write_hw_unit_min = bs; lim->atomic_write_hw_unit_max = rounddown_pow_of_two(atomic_bs); lim->features |= BLK_FEAT_ATOMIC_WRITES; + return atomic_bs; } static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl) @@ -2067,34 +2087,8 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id, valid = false; } - atomic_bs = phys_bs = bs; - if (id->nabo == 0) { - /* - * Bit 1 indicates whether NAWUPF is defined for this namespace - * and whether it should be used instead of AWUPF. If NAWUPF == - * 0 then AWUPF must be used instead. - */ - if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) - atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; - else - atomic_bs = (1 + ns->ctrl->awupf) * bs; - - /* - * Set subsystem atomic bs. - */ - if (ns->ctrl->subsys->atomic_bs) { - if (atomic_bs != ns->ctrl->subsys->atomic_bs) { - dev_err_ratelimited(ns->ctrl->device, - "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n", - ns->disk ? ns->disk->disk_name : "?", - ns->ctrl->subsys->atomic_bs, - atomic_bs); - } - } else - ns->ctrl->subsys->atomic_bs = atomic_bs; - - nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs); - } + phys_bs = bs; + atomic_bs = nvme_configure_atomic_write(ns, id, lim, bs); if (id->nsfeat & NVME_NS_FEAT_IO_OPT) { /* NPWG = Namespace Preferred Write Granularity */ @@ -2382,16 +2376,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if (!nvme_update_disk_info(ns, id, &lim)) capacity = 0; - /* - * Validate the max atomic write size fits within the subsystem's - * atomic write capabilities. - */ - if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) { - blk_mq_unfreeze_queue(ns->disk->queue, memflags); - ret = -ENXIO; - goto out; - } - nvme_config_discard(ns, &lim); if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && ns->head->ids.csi == NVME_CSI_ZNS) @@ -3215,6 +3199,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) memcpy(subsys->model, id->mn, sizeof(subsys->model)); subsys->vendor_id = le16_to_cpu(id->vid); subsys->cmic = id->cmic; + subsys->awupf = le16_to_cpu(id->awupf); /* Versions prior to 1.4 don't necessarily report a valid type */ if (id->cntrltype == NVME_CTRL_DISC || @@ -3552,6 +3537,15 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret) goto out_free; } + + if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) { + dev_err_ratelimited(ctrl->device, + "inconsistent AWUPF, controller not added (%u/%u).\n", + le16_to_cpu(id->awupf), ctrl->subsys->awupf); + ret = -EINVAL; + goto out_free; + } + memcpy(ctrl->subsys->firmware_rev, id->fr, sizeof(ctrl->subsys->firmware_rev)); @@ -3647,7 +3641,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) dev_pm_qos_expose_latency_tolerance(ctrl->device); else if (!ctrl->apst_enabled && prev_apst_enabled) dev_pm_qos_hide_latency_tolerance(ctrl->device); - ctrl->awupf = le16_to_cpu(id->awupf); out_free: kfree(id); return ret; @@ -4036,6 +4029,10 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info) list_add_tail_rcu(&ns->siblings, &head->list); ns->head = head; mutex_unlock(&ctrl->subsys->lock); + +#ifdef CONFIG_NVME_MULTIPATH + cancel_delayed_work(&head->remove_work); +#endif return 0; out_put_ns_head: @@ -4089,6 +4086,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) struct nvme_ns *ns; struct gendisk *disk; int node = ctrl->numa_node; + bool last_path = false; ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); if (!ns) @@ -4181,9 +4179,22 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) out_unlink_ns: mutex_lock(&ctrl->subsys->lock); list_del_rcu(&ns->siblings); - if (list_empty(&ns->head->list)) + if (list_empty(&ns->head->list)) { list_del_init(&ns->head->entry); + /* + * If multipath is not configured, we still create a namespace + * head (nshead), but head->disk is not initialized in that + * case. As a result, only a single reference to nshead is held + * (via kref_init()) when it is created. Therefore, ensure that + * we do not release the reference to nshead twice if head->disk + * is not present. + */ + if (ns->head->disk) + last_path = true; + } mutex_unlock(&ctrl->subsys->lock); + if (last_path) + nvme_put_ns_head(ns->head); nvme_put_ns_head(ns->head); out_cleanup_disk: put_disk(disk); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index e040e467f9fa..3da980dc60d9 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -690,8 +690,8 @@ static void nvme_remove_head(struct nvme_ns_head *head) nvme_cdev_del(&head->cdev, &head->cdev_device); synchronize_srcu(&head->srcu); del_gendisk(head->disk); - nvme_put_ns_head(head); } + nvme_put_ns_head(head); } static void nvme_remove_head_work(struct work_struct *work) @@ -1200,7 +1200,8 @@ void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head) */ srcu_idx = srcu_read_lock(&head->srcu); - list_for_each_entry_rcu(ns, &head->list, siblings) { + list_for_each_entry_srcu(ns, &head->list, siblings, + srcu_read_lock_held(&head->srcu)) { /* * Ensure that ns path disk node is already added otherwise we * may get invalid kobj name for target @@ -1291,6 +1292,9 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) { bool remove = false; + if (!head->disk) + return; + mutex_lock(&head->subsys->lock); /* * We are called when all paths have been removed, and at that point @@ -1311,7 +1315,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) */ if (!try_module_get(THIS_MODULE)) goto out; - queue_delayed_work(nvme_wq, &head->remove_work, + mod_delayed_work(nvme_wq, &head->remove_work, head->delayed_removal_secs * HZ); } else { list_del_init(&head->entry); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a468cdc5b5cb..7df2ea21851f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -410,7 +410,6 @@ struct nvme_ctrl { enum nvme_ctrl_type cntrltype; enum nvme_dctype dctype; - u16 awupf; /* 0's based value. */ }; static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl) @@ -443,11 +442,11 @@ struct nvme_subsystem { u8 cmic; enum nvme_subsys_type subtype; u16 vendor_id; + u16 awupf; /* 0's based value. */ struct ida ns_ida; #ifdef CONFIG_NVME_MULTIPATH enum nvme_iopolicy iopolicy; #endif - u32 atomic_bs; }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8ff12e415cb5..320aaa41ec39 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2101,8 +2101,6 @@ static void nvme_map_cmb(struct nvme_dev *dev) if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) == (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) pci_p2pmem_publish(pdev, true); - - nvme_update_attrs(dev); } static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) @@ -3010,6 +3008,8 @@ static void nvme_reset_work(struct work_struct *work) if (result < 0) goto out; + nvme_update_attrs(dev); + result = nvme_setup_io_queues(dev); if (result) goto out; @@ -3343,6 +3343,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result < 0) goto out_disable; + nvme_update_attrs(dev); + result = nvme_setup_io_queues(dev); if (result) goto out_disable; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index df69a9dee71c..51df72f5e89b 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -867,6 +867,8 @@ static inline void nvmet_req_bio_put(struct nvmet_req *req, struct bio *bio) { if (bio != &req->b.inline_bio) bio_put(bio); + else + bio_uninit(bio); } #ifdef CONFIG_NVME_TARGET_TCP_TLS diff --git a/drivers/pci/controller/pci-host-common.c b/drivers/pci/controller/pci-host-common.c index b0992325dd65..b37052863847 100644 --- a/drivers/pci/controller/pci-host-common.c +++ b/drivers/pci/controller/pci-host-common.c @@ -64,13 +64,13 @@ int pci_host_common_init(struct platform_device *pdev, of_pci_check_probe_only(); + platform_set_drvdata(pdev, bridge); + /* Parse and map our Configuration Space windows */ cfg = gen_pci_init(dev, bridge, ops); if (IS_ERR(cfg)) return PTR_ERR(cfg); - platform_set_drvdata(pdev, bridge); - bridge->sysdata = cfg; bridge->ops = (struct pci_ops *)&ops->pci_ops; bridge->enable_device = ops->enable_device; diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c index 77fe73976654..0380d300adca 100644 --- a/drivers/pci/controller/pcie-apple.c +++ b/drivers/pci/controller/pcie-apple.c @@ -187,6 +187,7 @@ struct apple_pcie { const struct hw_info *hw; unsigned long *bitmap; struct list_head ports; + struct list_head entry; struct completion event; struct irq_fwspec fwspec; u32 nvecs; @@ -205,6 +206,9 @@ struct apple_pcie_port { int idx; }; +static LIST_HEAD(pcie_list); +static DEFINE_MUTEX(pcie_list_lock); + static void rmw_set(u32 set, void __iomem *addr) { writel_relaxed(readl_relaxed(addr) | set, addr); @@ -720,13 +724,45 @@ static int apple_msi_init(struct apple_pcie *pcie) return 0; } +static void apple_pcie_register(struct apple_pcie *pcie) +{ + guard(mutex)(&pcie_list_lock); + + list_add_tail(&pcie->entry, &pcie_list); +} + +static void apple_pcie_unregister(struct apple_pcie *pcie) +{ + guard(mutex)(&pcie_list_lock); + + list_del(&pcie->entry); +} + +static struct apple_pcie *apple_pcie_lookup(struct device *dev) +{ + struct apple_pcie *pcie; + + guard(mutex)(&pcie_list_lock); + + list_for_each_entry(pcie, &pcie_list, entry) { + if (pcie->dev == dev) + return pcie; + } + + return NULL; +} + static struct apple_pcie_port *apple_pcie_get_port(struct pci_dev *pdev) { struct pci_config_window *cfg = pdev->sysdata; - struct apple_pcie *pcie = cfg->priv; + struct apple_pcie *pcie; struct pci_dev *port_pdev; struct apple_pcie_port *port; + pcie = apple_pcie_lookup(cfg->parent); + if (WARN_ON(!pcie)) + return NULL; + /* Find the root port this device is on */ port_pdev = pcie_find_root_port(pdev); @@ -806,10 +842,14 @@ static void apple_pcie_disable_device(struct pci_host_bridge *bridge, struct pci static int apple_pcie_init(struct pci_config_window *cfg) { - struct apple_pcie *pcie = cfg->priv; struct device *dev = cfg->parent; + struct apple_pcie *pcie; int ret; + pcie = apple_pcie_lookup(dev); + if (WARN_ON(!pcie)) + return -ENOENT; + for_each_available_child_of_node_scoped(dev->of_node, of_port) { ret = apple_pcie_setup_port(pcie, of_port); if (ret) { @@ -852,13 +892,18 @@ static int apple_pcie_probe(struct platform_device *pdev) mutex_init(&pcie->lock); INIT_LIST_HEAD(&pcie->ports); - dev_set_drvdata(dev, pcie); ret = apple_msi_init(pcie); if (ret) return ret; - return pci_host_common_init(pdev, &apple_pcie_cfg_ecam_ops); + apple_pcie_register(pcie); + + ret = pci_host_common_init(pdev, &apple_pcie_cfg_ecam_ops); + if (ret) + apple_pcie_unregister(pcie); + + return ret; } static const struct of_device_id apple_pcie_of_match[] = { diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c index 2c5e6446e00e..260b7de2dbd5 100644 --- a/drivers/pci/ecam.c +++ b/drivers/pci/ecam.c @@ -84,8 +84,6 @@ struct pci_config_window *pci_ecam_create(struct device *dev, goto err_exit_iomap; } - cfg->priv = dev_get_drvdata(dev); - if (ops->init) { err = ops->init(cfg); if (err) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index ebd342bda235..91d2d92717d9 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -771,7 +771,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) u16 ignored_events = PCI_EXP_SLTSTA_DLLSC; if (!ctrl->inband_presence_disabled) - ignored_events |= events & PCI_EXP_SLTSTA_PDC; + ignored_events |= PCI_EXP_SLTSTA_PDC; events &= ~ignored_events; pciehp_ignore_link_change(ctrl, pdev, irq, ignored_events); diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 6ede55a7c5e6..d686488f4111 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -934,10 +934,12 @@ int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int index, u16 tag) if (!pdev->msix_enabled) return -ENXIO; - guard(msi_descs_lock)(&pdev->dev); virq = msi_get_virq(&pdev->dev, index); if (!virq) return -ENXIO; + + guard(msi_descs_lock)(&pdev->dev); + /* * This is a horrible hack, but short of implementing a PCI * specific interrupt chip callback and a huge pile of diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index b78e0e417324..af370628e583 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -1676,19 +1676,24 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) return NULL; root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL); - if (!root_ops) - goto free_ri; + if (!root_ops) { + kfree(ri); + return NULL; + } ri->cfg = pci_acpi_setup_ecam_mapping(root); - if (!ri->cfg) - goto free_root_ops; + if (!ri->cfg) { + kfree(ri); + kfree(root_ops); + return NULL; + } root_ops->release_info = pci_acpi_generic_release_info; root_ops->prepare_resources = pci_acpi_root_prepare_resources; root_ops->pci_ops = (struct pci_ops *)&ri->cfg->ops->pci_ops; bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg); if (!bus) - goto free_cfg; + return NULL; /* If we must preserve the resource configuration, claim now */ host = pci_find_host_bridge(bus); @@ -1705,14 +1710,6 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) pcie_bus_configure_settings(child); return bus; - -free_cfg: - pci_ecam_free(ri->cfg); -free_root_ops: - kfree(root_ops); -free_ri: - kfree(ri); - return NULL; } void pcibios_add_bus(struct pci_bus *bus) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e9448d55113b..9e42090fb108 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3217,14 +3217,14 @@ void pci_pm_init(struct pci_dev *dev) /* find PCI PM capability in list */ pm = pci_find_capability(dev, PCI_CAP_ID_PM); if (!pm) - return; + goto poweron; /* Check device's ability to generate PME# */ pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc); if ((pmc & PCI_PM_CAP_VER_MASK) > 3) { pci_err(dev, "unsupported PM cap regs version (%u)\n", pmc & PCI_PM_CAP_VER_MASK); - return; + goto poweron; } dev->pm_cap = pm; @@ -3269,6 +3269,7 @@ void pci_pm_init(struct pci_dev *dev) pci_read_config_word(dev, PCI_STATUS, &status); if (status & PCI_STATUS_IMM_READY) dev->imm_ready = 1; +poweron: pci_pm_power_up_and_verify_state(dev); pm_runtime_forbid(&dev->dev); pm_runtime_set_active(&dev->dev); diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c index ee5f615a9023..4bd73f038ffb 100644 --- a/drivers/pci/pcie/ptm.c +++ b/drivers/pci/pcie/ptm.c @@ -254,6 +254,7 @@ bool pcie_ptm_enabled(struct pci_dev *dev) } EXPORT_SYMBOL(pcie_ptm_enabled); +#if IS_ENABLED(CONFIG_DEBUG_FS) static ssize_t context_update_write(struct file *file, const char __user *ubuf, size_t count, loff_t *ppos) { @@ -552,3 +553,4 @@ void pcie_ptm_destroy_debugfs(struct pci_ptm_debugfs *ptm_debugfs) debugfs_remove_recursive(ptm_debugfs->debugfs); } EXPORT_SYMBOL_GPL(pcie_ptm_destroy_debugfs); +#endif diff --git a/drivers/pinctrl/nuvoton/pinctrl-ma35.c b/drivers/pinctrl/nuvoton/pinctrl-ma35.c index 06ae1fe8b8c5..b51704bafd81 100644 --- a/drivers/pinctrl/nuvoton/pinctrl-ma35.c +++ b/drivers/pinctrl/nuvoton/pinctrl-ma35.c @@ -1074,7 +1074,10 @@ static int ma35_pinctrl_probe_dt(struct platform_device *pdev, struct ma35_pinct u32 idx = 0; int ret; - for_each_gpiochip_node(dev, child) { + device_for_each_child_node(dev, child) { + if (fwnode_property_present(child, "gpio-controller")) + continue; + npctl->nfunctions++; npctl->ngroups += of_get_child_count(to_of_node(child)); } @@ -1092,7 +1095,10 @@ static int ma35_pinctrl_probe_dt(struct platform_device *pdev, struct ma35_pinct if (!npctl->groups) return -ENOMEM; - for_each_gpiochip_node(dev, child) { + device_for_each_child_node(dev, child) { + if (fwnode_property_present(child, "gpio-controller")) + continue; + ret = ma35_pinctrl_parse_functions(child, npctl, idx++); if (ret) { fwnode_handle_put(child); diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 5cf3db6d78b7..b3f0d02aeeb3 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -979,6 +979,17 @@ static int amd_gpio_suspend_hibernate_common(struct device *dev, bool is_suspend pin, is_suspend ? "suspend" : "hibernate"); } + /* + * debounce enabled over suspend has shown issues with a GPIO + * being unable to wake the system, as we're only interested in + * the actual wakeup event, clear it. + */ + if (gpio_dev->saved_regs[i] & (DB_CNTRl_MASK << DB_CNTRL_OFF)) { + amd_gpio_set_debounce(gpio_dev, pin, 0); + pm_pr_dbg("Clearing debounce for GPIO #%d during %s.\n", + pin, is_suspend ? "suspend" : "hibernate"); + } + raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); } diff --git a/drivers/pinctrl/pinctrl-aw9523.c b/drivers/pinctrl/pinctrl-aw9523.c index 9bf53de20be8..04afb344e9e5 100644 --- a/drivers/pinctrl/pinctrl-aw9523.c +++ b/drivers/pinctrl/pinctrl-aw9523.c @@ -784,7 +784,7 @@ static int aw9523_init_gpiochip(struct aw9523 *awi, unsigned int npins) gc->set_config = gpiochip_generic_config; gc->parent = dev; gc->owner = THIS_MODULE; - gc->can_sleep = false; + gc->can_sleep = true; return 0; } diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 5c4687de1464..f713c80d7f3e 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -1038,6 +1038,25 @@ static bool msm_gpio_needs_dual_edge_parent_workaround(struct irq_data *d, test_bit(d->hwirq, pctrl->skip_wake_irqs); } +static void msm_gpio_irq_init_valid_mask(struct gpio_chip *gc, + unsigned long *valid_mask, + unsigned int ngpios) +{ + struct msm_pinctrl *pctrl = gpiochip_get_data(gc); + const struct msm_pingroup *g; + int i; + + bitmap_fill(valid_mask, ngpios); + + for (i = 0; i < ngpios; i++) { + g = &pctrl->soc->groups[i]; + + if (g->intr_detection_width != 1 && + g->intr_detection_width != 2) + clear_bit(i, valid_mask); + } +} + static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); @@ -1441,6 +1460,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_bad_irq; girq->parents[0] = pctrl->irq; + girq->init_valid_mask = msm_gpio_irq_init_valid_mask; ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl); if (ret) { diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c index 900069eb186e..a1c529f1ff1a 100644 --- a/drivers/platform/mellanox/mlxbf-pmc.c +++ b/drivers/platform/mellanox/mlxbf-pmc.c @@ -715,7 +715,7 @@ static const struct mlxbf_pmc_events mlxbf_pmc_llt_events[] = { {101, "GDC_BANK0_HIT_DCL_PARTIAL"}, {102, "GDC_BANK0_EVICT_DCL"}, {103, "GDC_BANK0_G_RSE_PIPE_CACHE_DATA0"}, - {103, "GDC_BANK0_G_RSE_PIPE_CACHE_DATA1"}, + {104, "GDC_BANK0_G_RSE_PIPE_CACHE_DATA1"}, {105, "GDC_BANK0_ARB_STRB"}, {106, "GDC_BANK0_ARB_WAIT"}, {107, "GDC_BANK0_GGA_STRB"}, diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c index aae99adb29eb..14aa87b39be5 100644 --- a/drivers/platform/mellanox/mlxbf-tmfifo.c +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c @@ -281,7 +281,8 @@ static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo, vring->align = SMP_CACHE_BYTES; vring->index = i; vring->vdev_id = tm_vdev->vdev.id.device; - vring->drop_desc.len = VRING_DROP_DESC_MAX_LEN; + vring->drop_desc.len = cpu_to_virtio32(&tm_vdev->vdev, + VRING_DROP_DESC_MAX_LEN); dev = &tm_vdev->vdev.dev; size = vring_size(vring->num, vring->align); @@ -1287,7 +1288,7 @@ static void mlxbf_tmfifo_get_cfg_mac(u8 *mac) ether_addr_copy(mac, mlxbf_tmfifo_net_default_mac); } -/* Set TmFifo thresolds which is used to trigger interrupts. */ +/* Set TmFifo thresholds which is used to trigger interrupts. */ static void mlxbf_tmfifo_set_threshold(struct mlxbf_tmfifo *fifo) { u64 ctl; diff --git a/drivers/platform/mellanox/mlxreg-dpu.c b/drivers/platform/mellanox/mlxreg-dpu.c index 52260106a9f1..39f89c47144a 100644 --- a/drivers/platform/mellanox/mlxreg-dpu.c +++ b/drivers/platform/mellanox/mlxreg-dpu.c @@ -483,7 +483,7 @@ static int mlxreg_dpu_config_init(struct mlxreg_dpu *mlxreg_dpu, void *regmap, mlxreg_dpu->io_data, sizeof(*mlxreg_dpu->io_data)); if (IS_ERR(mlxreg_dpu->io_regs)) { - dev_err(dev, "Failed to create regio for client %s at bus %d at addr 0x%02x\n", + dev_err(dev, "Failed to create region for client %s at bus %d at addr 0x%02x\n", data->hpdev.brdinfo->type, data->hpdev.nr, data->hpdev.brdinfo->addr); return PTR_ERR(mlxreg_dpu->io_regs); diff --git a/drivers/platform/mellanox/mlxreg-lc.c b/drivers/platform/mellanox/mlxreg-lc.c index aee395bb48ae..d1518598dfed 100644 --- a/drivers/platform/mellanox/mlxreg-lc.c +++ b/drivers/platform/mellanox/mlxreg-lc.c @@ -57,9 +57,9 @@ enum mlxreg_lc_state { * @dev: platform device; * @lock: line card lock; * @par_regmap: parent device regmap handle; - * @data: pltaform core data; + * @data: platform core data; * @io_data: register access platform data; - * @led_data: LED platform data ; + * @led_data: LED platform data; * @mux_data: MUX platform data; * @led: LED device; * @io_regs: register access device; @@ -171,7 +171,7 @@ static int mlxreg_lc_chan[] = { 0x4e, 0x4f }; -/* Defaul mux configuration. */ +/* Default mux configuration. */ static struct mlxcpld_mux_plat_data mlxreg_lc_mux_data[] = { { .chan_ids = mlxreg_lc_chan, @@ -181,7 +181,7 @@ static struct mlxcpld_mux_plat_data mlxreg_lc_mux_data[] = { }, }; -/* Defaul mux board info. */ +/* Default mux board info. */ static struct i2c_board_info mlxreg_lc_mux_brdinfo = { I2C_BOARD_INFO("i2c-mux-mlxcpld", 0x32), }; @@ -688,7 +688,7 @@ static int mlxreg_lc_completion_notify(void *handle, struct i2c_adapter *parent, if (regval & mlxreg_lc->data->mask) { mlxreg_lc->state |= MLXREG_LC_SYNCED; mlxreg_lc_state_update_locked(mlxreg_lc, MLXREG_LC_SYNCED, 1); - if (mlxreg_lc->state & ~MLXREG_LC_POWERED) { + if (!(mlxreg_lc->state & MLXREG_LC_POWERED)) { err = mlxreg_lc_power_on_off(mlxreg_lc, 1); if (err) goto mlxreg_lc_regmap_power_on_off_fail; @@ -758,7 +758,7 @@ mlxreg_lc_config_init(struct mlxreg_lc *mlxreg_lc, void *regmap, platform_device_register_resndata(dev, "mlxreg-io", data->hpdev.nr, NULL, 0, mlxreg_lc->io_data, sizeof(*mlxreg_lc->io_data)); if (IS_ERR(mlxreg_lc->io_regs)) { - dev_err(dev, "Failed to create regio for client %s at bus %d at addr 0x%02x\n", + dev_err(dev, "Failed to create region for client %s at bus %d at addr 0x%02x\n", data->hpdev.brdinfo->type, data->hpdev.nr, data->hpdev.brdinfo->addr); err = PTR_ERR(mlxreg_lc->io_regs); diff --git a/drivers/platform/mellanox/nvsw-sn2201.c b/drivers/platform/mellanox/nvsw-sn2201.c index db31c8bf2255..51504113c17e 100644 --- a/drivers/platform/mellanox/nvsw-sn2201.c +++ b/drivers/platform/mellanox/nvsw-sn2201.c @@ -1181,7 +1181,7 @@ static int nvsw_sn2201_i2c_completion_notify(void *handle, int id) if (!nvsw_sn2201->main_mux_devs->adapter) { err = -ENODEV; dev_err(nvsw_sn2201->dev, "Failed to get adapter for bus %d\n", - nvsw_sn2201->cpld_devs->nr); + nvsw_sn2201->main_mux_devs->nr); goto i2c_get_adapter_main_fail; } diff --git a/drivers/platform/x86/amd/amd_isp4.c b/drivers/platform/x86/amd/amd_isp4.c index 0cc01441bcbb..0d494899502c 100644 --- a/drivers/platform/x86/amd/amd_isp4.c +++ b/drivers/platform/x86/amd/amd_isp4.c @@ -11,6 +11,7 @@ #include <linux/mutex.h> #include <linux/platform_device.h> #include <linux/property.h> +#include <linux/soc/amd/isp4_misc.h> #include <linux/string.h> #include <linux/types.h> #include <linux/units.h> @@ -20,6 +21,9 @@ #define AMDISP_OV05C10_REMOTE_EP_NAME "ov05c10_isp_4_1_1" #define AMD_ISP_PLAT_DRV_NAME "amd-isp4" +static const struct software_node isp4_mipi1_endpoint_node; +static const struct software_node ov05c10_endpoint_node; + /* * AMD ISP platform info definition to initialize sensor * specific platform configuration to prepare the amdisp @@ -42,55 +46,116 @@ struct amdisp_platform { struct mutex lock; /* protects i2c client creation */ }; -/* Top-level OV05C10 camera node property table */ +/* Root AMD CAMERA SWNODE */ + +/* Root amd camera node definition */ +static const struct software_node amd_camera_node = { + .name = "amd_camera", +}; + +/* ISP4 SWNODE */ + +/* ISP4 OV05C10 camera node definition */ +static const struct software_node isp4_node = { + .name = "isp4", + .parent = &amd_camera_node, +}; + +/* + * ISP4 Ports node definition. No properties defined for + * ports node. + */ +static const struct software_node isp4_ports = { + .name = "ports", + .parent = &isp4_node, +}; + +/* + * ISP4 Port node definition. No properties defined for + * port node. + */ +static const struct software_node isp4_port_node = { + .name = "port@0", + .parent = &isp4_ports, +}; + +/* + * ISP4 MIPI1 remote endpoint points to OV05C10 endpoint + * node. + */ +static const struct software_node_ref_args isp4_refs[] = { + SOFTWARE_NODE_REFERENCE(&ov05c10_endpoint_node), +}; + +/* ISP4 MIPI1 endpoint node properties table */ +static const struct property_entry isp4_mipi1_endpoint_props[] = { + PROPERTY_ENTRY_REF_ARRAY("remote-endpoint", isp4_refs), + { } +}; + +/* ISP4 MIPI1 endpoint node definition */ +static const struct software_node isp4_mipi1_endpoint_node = { + .name = "endpoint", + .parent = &isp4_port_node, + .properties = isp4_mipi1_endpoint_props, +}; + +/* I2C1 SWNODE */ + +/* I2C1 camera node property table */ +static const struct property_entry i2c1_camera_props[] = { + PROPERTY_ENTRY_U32("clock-frequency", 1 * HZ_PER_MHZ), + { } +}; + +/* I2C1 camera node definition */ +static const struct software_node i2c1_node = { + .name = "i2c1", + .parent = &amd_camera_node, + .properties = i2c1_camera_props, +}; + +/* I2C1 camera node property table */ static const struct property_entry ov05c10_camera_props[] = { PROPERTY_ENTRY_U32("clock-frequency", 24 * HZ_PER_MHZ), { } }; -/* Root AMD ISP OV05C10 camera node definition */ -static const struct software_node camera_node = { +/* OV05C10 camera node definition */ +static const struct software_node ov05c10_camera_node = { .name = AMDISP_OV05C10_HID, + .parent = &i2c1_node, .properties = ov05c10_camera_props, }; /* - * AMD ISP OV05C10 Ports node definition. No properties defined for + * OV05C10 Ports node definition. No properties defined for * ports node for OV05C10. */ -static const struct software_node ports = { +static const struct software_node ov05c10_ports = { .name = "ports", - .parent = &camera_node, -}; - -/* - * AMD ISP OV05C10 Port node definition. No properties defined for - * port node for OV05C10. - */ -static const struct software_node port_node = { - .name = "port@", - .parent = &ports, + .parent = &ov05c10_camera_node, }; /* - * Remote endpoint AMD ISP node definition. No properties defined for - * remote endpoint node for OV05C10. + * OV05C10 Port node definition. */ -static const struct software_node remote_ep_isp_node = { - .name = AMDISP_OV05C10_REMOTE_EP_NAME, +static const struct software_node ov05c10_port_node = { + .name = "port@0", + .parent = &ov05c10_ports, }; /* - * Remote endpoint reference for isp node included in the - * OV05C10 endpoint. + * OV05C10 remote endpoint points to ISP4 MIPI1 endpoint + * node. */ static const struct software_node_ref_args ov05c10_refs[] = { - SOFTWARE_NODE_REFERENCE(&remote_ep_isp_node), + SOFTWARE_NODE_REFERENCE(&isp4_mipi1_endpoint_node), }; /* OV05C10 supports one single link frequency */ static const u64 ov05c10_link_freqs[] = { - 925 * HZ_PER_MHZ, + 900 * HZ_PER_MHZ, }; /* OV05C10 supports only 2-lane configuration */ @@ -110,27 +175,64 @@ static const struct property_entry ov05c10_endpoint_props[] = { { } }; -/* AMD ISP endpoint node definition */ -static const struct software_node endpoint_node = { +/* OV05C10 endpoint node definition */ +static const struct software_node ov05c10_endpoint_node = { .name = "endpoint", - .parent = &port_node, + .parent = &ov05c10_port_node, .properties = ov05c10_endpoint_props, }; /* - * AMD ISP swnode graph uses 5 nodes and also its relationship is - * fixed to align with the structure that v4l2 expects for successful - * endpoint fwnode parsing. + * AMD Camera swnode graph uses 10 nodes and also its relationship is + * fixed to align with the structure that v4l2 and i2c frameworks expects + * for successful parsing of fwnodes and its properties with standard names. * * It is only the node property_entries that will vary for each platform * supporting different sensor modules. + * + * AMD ISP4 SWNODE GRAPH Structure + * + * amd_camera { + * isp4 { + * ports { + * port@0 { + * isp4_mipi1_ep: endpoint { + * remote-endpoint = &OMNI5C10_ep; + * }; + * }; + * }; + * }; + * + * i2c1 { + * clock-frequency = 1 MHz; + * OMNI5C10 { + * clock-frequency = 24MHz; + * ports { + * port@0 { + * OMNI5C10_ep: endpoint { + * bus-type = 4; + * data-lanes = <1 2>; + * link-frequencies = 900MHz; + * remote-endpoint = &isp4_mipi1; + * }; + * }; + * }; + * }; + * }; + * }; + * */ -static const struct software_node *ov05c10_nodes[] = { - &camera_node, - &ports, - &port_node, - &endpoint_node, - &remote_ep_isp_node, +static const struct software_node *amd_isp4_nodes[] = { + &amd_camera_node, + &isp4_node, + &isp4_ports, + &isp4_port_node, + &isp4_mipi1_endpoint_node, + &i2c1_node, + &ov05c10_camera_node, + &ov05c10_ports, + &ov05c10_port_node, + &ov05c10_endpoint_node, NULL }; @@ -140,7 +242,7 @@ static const struct amdisp_platform_info ov05c10_platform_config = { .dev_name = "ov05c10", I2C_BOARD_INFO("ov05c10", AMDISP_OV05C10_I2C_ADDR), }, - .swnodes = ov05c10_nodes, + .swnodes = amd_isp4_nodes, }; static const struct acpi_device_id amdisp_sensor_ids[] = { @@ -151,7 +253,7 @@ MODULE_DEVICE_TABLE(acpi, amdisp_sensor_ids); static inline bool is_isp_i2c_adapter(struct i2c_adapter *adap) { - return !strcmp(adap->owner->name, "i2c_designware_amdisp"); + return !strcmp(adap->name, AMDISP_I2C_ADAP_NAME); } static void instantiate_isp_i2c_client(struct amdisp_platform *isp4_platform, @@ -232,7 +334,8 @@ static struct amdisp_platform *prepare_amdisp_platform(struct device *dev, if (ret) return ERR_PTR(ret); - isp4_platform->board_info.swnode = src->swnodes[0]; + /* initialize ov05c10_camera_node */ + isp4_platform->board_info.swnode = src->swnodes[6]; return isp4_platform; } @@ -257,6 +360,7 @@ static int amd_isp_probe(struct platform_device *pdev) { const struct amdisp_platform_info *pinfo; struct amdisp_platform *isp4_platform; + struct acpi_device *adev; int ret; pinfo = device_get_match_data(&pdev->dev); @@ -274,6 +378,10 @@ static int amd_isp_probe(struct platform_device *pdev) if (ret) goto error_unregister_sw_node; + adev = ACPI_COMPANION(&pdev->dev); + /* initialize root amd_camera_node */ + adev->driver_data = (void *)pinfo->swnodes[0]; + /* check if adapter is already registered and create i2c client instance */ i2c_for_each_dev(isp4_platform, try_to_instantiate_i2c_client); diff --git a/drivers/platform/x86/amd/hsmp/hsmp.c b/drivers/platform/x86/amd/hsmp/hsmp.c index 538b36b97095..885e2f8136fd 100644 --- a/drivers/platform/x86/amd/hsmp/hsmp.c +++ b/drivers/platform/x86/amd/hsmp/hsmp.c @@ -97,7 +97,7 @@ static int __hsmp_send_message(struct hsmp_socket *sock, struct hsmp_message *ms short_sleep = jiffies + msecs_to_jiffies(HSMP_SHORT_SLEEP); timeout = jiffies + msecs_to_jiffies(HSMP_MSG_TIMEOUT); - while (time_before(jiffies, timeout)) { + while (true) { ret = sock->amd_hsmp_rdwr(sock, mbinfo->msg_resp_off, &mbox_status, HSMP_RD); if (ret) { dev_err(sock->dev, "Error %d reading mailbox status\n", ret); @@ -106,6 +106,10 @@ static int __hsmp_send_message(struct hsmp_socket *sock, struct hsmp_message *ms if (mbox_status != HSMP_STATUS_NOT_READY) break; + + if (!time_before(jiffies, timeout)) + break; + if (time_before(jiffies, short_sleep)) usleep_range(50, 100); else @@ -210,13 +214,7 @@ int hsmp_send_message(struct hsmp_message *msg) return -ENODEV; sock = &hsmp_pdev.sock[msg->sock_ind]; - /* - * The time taken by smu operation to complete is between - * 10us to 1ms. Sometime it may take more time. - * In SMP system timeout of 100 millisecs should - * be enough for the previous thread to finish the operation - */ - ret = down_timeout(&sock->hsmp_sem, msecs_to_jiffies(HSMP_MSG_TIMEOUT)); + ret = down_interruptible(&sock->hsmp_sem); if (ret < 0) return ret; diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c index 5c7c01f66cde..131f10b68308 100644 --- a/drivers/platform/x86/amd/pmc/pmc-quirks.c +++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c @@ -11,7 +11,7 @@ #include <linux/dmi.h> #include <linux/io.h> #include <linux/ioport.h> -#include <asm/amd/fch.h> +#include <linux/platform_data/x86/amd-fch.h> #include "pmc.h" @@ -225,6 +225,15 @@ static const struct dmi_system_id fwbug_list[] = { DMI_MATCH(DMI_BOARD_NAME, "WUJIE14-GX4HRXL"), } }, + /* https://bugzilla.kernel.org/show_bug.cgi?id=220116 */ + { + .ident = "PCSpecialist Lafite Pro V 14M", + .driver_data = &quirk_spurious_8042, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "PCSpecialist"), + DMI_MATCH(DMI_PRODUCT_NAME, "Lafite Pro V 14M"), + } + }, {} }; diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index 37c7a57afee5..0b9b23eb7c2c 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -157,6 +157,8 @@ static int amd_pmc_setup_smu_logging(struct amd_pmc_dev *dev) return -ENOMEM; } + memset_io(dev->smu_virt_addr, 0, sizeof(struct smu_metrics)); + /* Start the logging */ amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_RESET, false); amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_START, false); diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index 76910601cac8..ef988605c4da 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -280,7 +280,7 @@ int amd_pmf_set_dram_addr(struct amd_pmf_dev *dev, bool alloc_buffer) dev_err(dev->dev, "Invalid CPU id: 0x%x", dev->cpu_id); } - dev->buf = kzalloc(dev->mtable_size, GFP_KERNEL); + dev->buf = devm_kzalloc(dev->dev, dev->mtable_size, GFP_KERNEL); if (!dev->buf) return -ENOMEM; } @@ -493,7 +493,6 @@ static void amd_pmf_remove(struct platform_device *pdev) mutex_destroy(&dev->lock); mutex_destroy(&dev->update_mutex); mutex_destroy(&dev->cb_mutex); - kfree(dev->buf); } static const struct attribute_group *amd_pmf_driver_groups[] = { diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index d3bd12ad036a..4f626ebcb619 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -358,30 +358,28 @@ static ssize_t amd_pmf_get_pb_data(struct file *filp, const char __user *buf, return -EINVAL; /* re-alloc to the new buffer length of the policy binary */ - new_policy_buf = memdup_user(buf, length); - if (IS_ERR(new_policy_buf)) - return PTR_ERR(new_policy_buf); + new_policy_buf = devm_kzalloc(dev->dev, length, GFP_KERNEL); + if (!new_policy_buf) + return -ENOMEM; + + if (copy_from_user(new_policy_buf, buf, length)) { + devm_kfree(dev->dev, new_policy_buf); + return -EFAULT; + } - kfree(dev->policy_buf); + devm_kfree(dev->dev, dev->policy_buf); dev->policy_buf = new_policy_buf; dev->policy_sz = length; - if (!amd_pmf_pb_valid(dev)) { - ret = -EINVAL; - goto cleanup; - } + if (!amd_pmf_pb_valid(dev)) + return -EINVAL; amd_pmf_hex_dump_pb(dev); ret = amd_pmf_start_policy_engine(dev); if (ret < 0) - goto cleanup; + return ret; return length; - -cleanup: - kfree(dev->policy_buf); - dev->policy_buf = NULL; - return ret; } static const struct file_operations pb_fops = { @@ -422,12 +420,12 @@ static int amd_pmf_ta_open_session(struct tee_context *ctx, u32 *id, const uuid_ rc = tee_client_open_session(ctx, &sess_arg, NULL); if (rc < 0 || sess_arg.ret != 0) { pr_err("Failed to open TEE session err:%#x, rc:%d\n", sess_arg.ret, rc); - return rc; + return rc ?: -EINVAL; } *id = sess_arg.session; - return rc; + return 0; } static int amd_pmf_register_input_device(struct amd_pmf_dev *dev) @@ -462,7 +460,9 @@ static int amd_pmf_tee_init(struct amd_pmf_dev *dev, const uuid_t *uuid) dev->tee_ctx = tee_client_open_context(NULL, amd_pmf_amdtee_ta_match, NULL, NULL); if (IS_ERR(dev->tee_ctx)) { dev_err(dev->dev, "Failed to open TEE context\n"); - return PTR_ERR(dev->tee_ctx); + ret = PTR_ERR(dev->tee_ctx); + dev->tee_ctx = NULL; + return ret; } ret = amd_pmf_ta_open_session(dev->tee_ctx, &dev->session_id, uuid); @@ -502,9 +502,12 @@ out_ctx: static void amd_pmf_tee_deinit(struct amd_pmf_dev *dev) { + if (!dev->tee_ctx) + return; tee_shm_free(dev->fw_shm_pool); tee_client_close_session(dev->tee_ctx, dev->session_id); tee_client_close_context(dev->tee_ctx); + dev->tee_ctx = NULL; } int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) @@ -527,64 +530,45 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) ret = amd_pmf_set_dram_addr(dev, true); if (ret) - goto err_cancel_work; + return ret; dev->policy_base = devm_ioremap_resource(dev->dev, dev->res); - if (IS_ERR(dev->policy_base)) { - ret = PTR_ERR(dev->policy_base); - goto err_free_dram_buf; - } + if (IS_ERR(dev->policy_base)) + return PTR_ERR(dev->policy_base); - dev->policy_buf = kzalloc(dev->policy_sz, GFP_KERNEL); - if (!dev->policy_buf) { - ret = -ENOMEM; - goto err_free_dram_buf; - } + dev->policy_buf = devm_kzalloc(dev->dev, dev->policy_sz, GFP_KERNEL); + if (!dev->policy_buf) + return -ENOMEM; memcpy_fromio(dev->policy_buf, dev->policy_base, dev->policy_sz); if (!amd_pmf_pb_valid(dev)) { dev_info(dev->dev, "No Smart PC policy present\n"); - ret = -EINVAL; - goto err_free_policy; + return -EINVAL; } amd_pmf_hex_dump_pb(dev); - dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL); - if (!dev->prev_data) { - ret = -ENOMEM; - goto err_free_policy; - } + dev->prev_data = devm_kzalloc(dev->dev, sizeof(*dev->prev_data), GFP_KERNEL); + if (!dev->prev_data) + return -ENOMEM; for (i = 0; i < ARRAY_SIZE(amd_pmf_ta_uuid); i++) { ret = amd_pmf_tee_init(dev, &amd_pmf_ta_uuid[i]); if (ret) - goto err_free_prev_data; + return ret; ret = amd_pmf_start_policy_engine(dev); - switch (ret) { - case TA_PMF_TYPE_SUCCESS: - status = true; - break; - case TA_ERROR_CRYPTO_INVALID_PARAM: - case TA_ERROR_CRYPTO_BIN_TOO_LARGE: - amd_pmf_tee_deinit(dev); - status = false; - break; - default: - ret = -EINVAL; - amd_pmf_tee_deinit(dev); - goto err_free_prev_data; - } - + dev_dbg(dev->dev, "start policy engine ret: %d\n", ret); + status = ret == TA_PMF_TYPE_SUCCESS; if (status) break; + amd_pmf_tee_deinit(dev); } if (!status && !pb_side_load) { ret = -EINVAL; - goto err_free_prev_data; + goto err; } if (pb_side_load) @@ -592,22 +576,12 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) ret = amd_pmf_register_input_device(dev); if (ret) - goto err_pmf_remove_pb; + goto err; return 0; -err_pmf_remove_pb: - if (pb_side_load && dev->esbin) - amd_pmf_remove_pb(dev); - amd_pmf_tee_deinit(dev); -err_free_prev_data: - kfree(dev->prev_data); -err_free_policy: - kfree(dev->policy_buf); -err_free_dram_buf: - kfree(dev->buf); -err_cancel_work: - cancel_delayed_work_sync(&dev->pb_work); +err: + amd_pmf_deinit_smart_pc(dev); return ret; } @@ -621,11 +595,5 @@ void amd_pmf_deinit_smart_pc(struct amd_pmf_dev *dev) amd_pmf_remove_pb(dev); cancel_delayed_work_sync(&dev->pb_work); - kfree(dev->prev_data); - dev->prev_data = NULL; - kfree(dev->policy_buf); - dev->policy_buf = NULL; - kfree(dev->buf); - dev->buf = NULL; amd_pmf_tee_deinit(dev); } diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 3f8b2a324efd..f84c3d03c1de 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -530,6 +530,15 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_zenbook_duo_kbd, }, + { + .callback = dmi_matched, + .ident = "ASUS Zenbook Duo UX8406CA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "UX8406CA"), + }, + .driver_data = &quirk_asus_zenbook_duo_kbd, + }, {}, }; diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c index c42f9228b0b2..20ec122a9fe0 100644 --- a/drivers/platform/x86/dell/alienware-wmi-wmax.c +++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c @@ -119,7 +119,7 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m16 R1 AMD"), }, - .driver_data = &g_series_quirks, + .driver_data = &generic_quirks, }, { .ident = "Alienware m16 R2", diff --git a/drivers/platform/x86/dell/dell-lis3lv02d.c b/drivers/platform/x86/dell/dell-lis3lv02d.c index efe26d667973..0791118dd6b7 100644 --- a/drivers/platform/x86/dell/dell-lis3lv02d.c +++ b/drivers/platform/x86/dell/dell-lis3lv02d.c @@ -45,6 +45,7 @@ static const struct dmi_system_id lis3lv02d_devices[] __initconst = { * Additional individual entries were added after verification. */ DELL_LIS3LV02D_DMI_ENTRY("Latitude 5480", 0x29), + DELL_LIS3LV02D_DMI_ENTRY("Latitude 5500", 0x29), DELL_LIS3LV02D_DMI_ENTRY("Latitude E6330", 0x29), DELL_LIS3LV02D_DMI_ENTRY("Latitude E6430", 0x29), DELL_LIS3LV02D_DMI_ENTRY("Precision 3540", 0x29), diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h b/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h index 3ad33a094588..817ee7ba07ca 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h +++ b/drivers/platform/x86/dell/dell-wmi-sysman/dell-wmi-sysman.h @@ -89,6 +89,11 @@ extern struct wmi_sysman_priv wmi_priv; enum { ENUM, INT, STR, PO }; +#define ENUM_MIN_ELEMENTS 8 +#define INT_MIN_ELEMENTS 9 +#define STR_MIN_ELEMENTS 8 +#define PO_MIN_ELEMENTS 4 + enum { ATTR_NAME, DISPL_NAME_LANG_CODE, diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c index 8cc212c85266..fc2f58b4cbc6 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c @@ -23,9 +23,10 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a obj = get_wmiobj_pointer(instance_id, DELL_WMI_BIOS_ENUMERATION_ATTRIBUTE_GUID); if (!obj) return -EIO; - if (obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_STRING) { + if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < ENUM_MIN_ELEMENTS || + obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_STRING) { kfree(obj); - return -EINVAL; + return -EIO; } ret = snprintf(buf, PAGE_SIZE, "%s\n", obj->package.elements[CURRENT_VAL].string.pointer); kfree(obj); diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c index 951e75b538fa..735248064239 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c @@ -25,9 +25,10 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a obj = get_wmiobj_pointer(instance_id, DELL_WMI_BIOS_INTEGER_ATTRIBUTE_GUID); if (!obj) return -EIO; - if (obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_INTEGER) { + if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < INT_MIN_ELEMENTS || + obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_INTEGER) { kfree(obj); - return -EINVAL; + return -EIO; } ret = snprintf(buf, PAGE_SIZE, "%lld\n", obj->package.elements[CURRENT_VAL].integer.value); kfree(obj); diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c index d8f1bf5e58a0..3167e06d416e 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c @@ -26,9 +26,10 @@ static ssize_t is_enabled_show(struct kobject *kobj, struct kobj_attribute *attr obj = get_wmiobj_pointer(instance_id, DELL_WMI_BIOS_PASSOBJ_ATTRIBUTE_GUID); if (!obj) return -EIO; - if (obj->package.elements[IS_PASS_SET].type != ACPI_TYPE_INTEGER) { + if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < PO_MIN_ELEMENTS || + obj->package.elements[IS_PASS_SET].type != ACPI_TYPE_INTEGER) { kfree(obj); - return -EINVAL; + return -EIO; } ret = snprintf(buf, PAGE_SIZE, "%lld\n", obj->package.elements[IS_PASS_SET].integer.value); kfree(obj); diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c index c392f0ecf8b5..0d2c74f8d1aa 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c @@ -25,9 +25,10 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a obj = get_wmiobj_pointer(instance_id, DELL_WMI_BIOS_STRING_ATTRIBUTE_GUID); if (!obj) return -EIO; - if (obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_STRING) { + if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < STR_MIN_ELEMENTS || + obj->package.elements[CURRENT_VAL].type != ACPI_TYPE_STRING) { kfree(obj); - return -EINVAL; + return -EIO; } ret = snprintf(buf, PAGE_SIZE, "%s\n", obj->package.elements[CURRENT_VAL].string.pointer); kfree(obj); diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c index d00389b860e4..f5402b714657 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c @@ -407,10 +407,10 @@ static int init_bios_attributes(int attr_type, const char *guid) return retval; switch (attr_type) { - case ENUM: min_elements = 8; break; - case INT: min_elements = 9; break; - case STR: min_elements = 8; break; - case PO: min_elements = 4; break; + case ENUM: min_elements = ENUM_MIN_ELEMENTS; break; + case INT: min_elements = INT_MIN_ELEMENTS; break; + case STR: min_elements = STR_MIN_ELEMENTS; break; + case PO: min_elements = PO_MIN_ELEMENTS; break; default: pr_err("Error: Unknown attr_type: %d\n", attr_type); return -EINVAL; @@ -597,7 +597,7 @@ err_release_attributes_data: release_attributes_data(); err_destroy_classdev: - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(wmi_priv.class_dev); err_exit_bios_attr_pass_interface: exit_bios_attr_pass_interface(); @@ -611,7 +611,7 @@ err_exit_bios_attr_set_interface: static void __exit sysman_exit(void) { release_attributes_data(); - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(wmi_priv.class_dev); exit_bios_attr_set_interface(); exit_bios_attr_pass_interface(); } diff --git a/drivers/platform/x86/dell/dell_rbu.c b/drivers/platform/x86/dell/dell_rbu.c index e30ca325938c..9dd9f2cb074f 100644 --- a/drivers/platform/x86/dell/dell_rbu.c +++ b/drivers/platform/x86/dell/dell_rbu.c @@ -45,7 +45,7 @@ MODULE_AUTHOR("Abhay Salunke <abhay_salunke@dell.com>"); MODULE_DESCRIPTION("Driver for updating BIOS image on DELL systems"); MODULE_LICENSE("GPL"); -MODULE_VERSION("3.2"); +MODULE_VERSION("3.3"); #define BIOS_SCAN_LIMIT 0xffffffff #define MAX_IMAGE_LENGTH 16 @@ -91,7 +91,7 @@ static void init_packet_head(void) rbu_data.imagesize = 0; } -static int create_packet(void *data, size_t length) +static int create_packet(void *data, size_t length) __must_hold(&rbu_data.lock) { struct packet_data *newpacket; int ordernum = 0; @@ -292,7 +292,7 @@ static int packet_read_list(char *data, size_t * pread_length) remaining_bytes = *pread_length; bytes_read = rbu_data.packet_read_count; - list_for_each_entry(newpacket, (&packet_data_head.list)->next, list) { + list_for_each_entry(newpacket, &packet_data_head.list, list) { bytes_copied = do_packet_read(pdest, newpacket, remaining_bytes, bytes_read, &temp_count); remaining_bytes -= bytes_copied; @@ -315,14 +315,14 @@ static void packet_empty_list(void) { struct packet_data *newpacket, *tmp; - list_for_each_entry_safe(newpacket, tmp, (&packet_data_head.list)->next, list) { + list_for_each_entry_safe(newpacket, tmp, &packet_data_head.list, list) { list_del(&newpacket->list); /* * zero out the RBU packet memory before freeing * to make sure there are no stale RBU packets left in memory */ - memset(newpacket->data, 0, rbu_data.packetsize); + memset(newpacket->data, 0, newpacket->length); set_memory_wb((unsigned long)newpacket->data, 1 << newpacket->ordernum); free_pages((unsigned long) newpacket->data, diff --git a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c index 13237890fc92..5bfa7159f5bc 100644 --- a/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c +++ b/drivers/platform/x86/hp/hp-bioscfg/bioscfg.c @@ -1034,7 +1034,7 @@ err_release_attributes_data: release_attributes_data(); err_destroy_classdev: - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(bioscfg_drv.class_dev); err_unregister_class: hp_exit_attr_set_interface(); @@ -1045,7 +1045,7 @@ err_unregister_class: static void __exit hp_exit(void) { release_attributes_data(); - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(bioscfg_drv.class_dev); hp_exit_attr_set_interface(); } diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index ede483573fe0..b5e4da6a6779 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -15,6 +15,7 @@ #include <linux/bug.h> #include <linux/cleanup.h> #include <linux/debugfs.h> +#include <linux/delay.h> #include <linux/device.h> #include <linux/dmi.h> #include <linux/i8042.h> @@ -267,6 +268,20 @@ static void ideapad_shared_exit(struct ideapad_private *priv) */ #define IDEAPAD_EC_TIMEOUT 200 /* in ms */ +/* + * Some models (e.g., ThinkBook since 2024) have a low tolerance for being + * polled too frequently. Doing so may break the state machine in the EC, + * resulting in a hard shutdown. + * + * It is also observed that frequent polls may disturb the ongoing operation + * and notably delay the availability of EC response. + * + * These values are used as the delay before the first poll and the interval + * between subsequent polls to solve the above issues. + */ +#define IDEAPAD_EC_POLL_MIN_US 150 +#define IDEAPAD_EC_POLL_MAX_US 300 + static int eval_int(acpi_handle handle, const char *name, unsigned long *res) { unsigned long long result; @@ -383,7 +398,7 @@ static int read_ec_data(acpi_handle handle, unsigned long cmd, unsigned long *da end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1; while (time_before(jiffies, end_jiffies)) { - schedule(); + usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US); err = eval_vpcr(handle, 1, &val); if (err) @@ -414,7 +429,7 @@ static int write_ec_cmd(acpi_handle handle, unsigned long cmd, unsigned long dat end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1; while (time_before(jiffies, end_jiffies)) { - schedule(); + usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US); err = eval_vpcr(handle, 1, &val); if (err) diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 0b5e43444ed6..f25a427cccda 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -54,6 +54,7 @@ static const struct acpi_device_id intel_hid_ids[] = { { "INTC107B" }, { "INTC10CB" }, { "INTC10CC" }, + { "INTC10F1" }, { } }; MODULE_DEVICE_TABLE(acpi, intel_hid_ids); diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h index e136d18b1d38..4a94a4ee031e 100644 --- a/drivers/platform/x86/intel/pmc/core.h +++ b/drivers/platform/x86/intel/pmc/core.h @@ -299,6 +299,13 @@ enum ppfear_regs { #define PTL_PCD_PMC_MMIO_REG_LEN 0x31A8 /* SSRAM PMC Device ID */ +/* LNL */ +#define PMC_DEVID_LNL_SOCM 0xa87f + +/* PTL */ +#define PMC_DEVID_PTL_PCDH 0xe37f +#define PMC_DEVID_PTL_PCDP 0xe47f + /* ARL */ #define PMC_DEVID_ARL_SOCM 0x777f #define PMC_DEVID_ARL_SOCS 0xae7f diff --git a/drivers/platform/x86/intel/pmc/ssram_telemetry.c b/drivers/platform/x86/intel/pmc/ssram_telemetry.c index b207247eb5dd..93579152188e 100644 --- a/drivers/platform/x86/intel/pmc/ssram_telemetry.c +++ b/drivers/platform/x86/intel/pmc/ssram_telemetry.c @@ -187,6 +187,9 @@ static const struct pci_device_id intel_pmc_ssram_telemetry_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_MTL_SOCM) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_ARL_SOCS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_ARL_SOCM) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_LNL_SOCM) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_PTL_PCDH) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_PTL_PCDP) }, { } }; MODULE_DEVICE_TABLE(pci, intel_pmc_ssram_telemetry_pci_ids); diff --git a/drivers/platform/x86/intel/tpmi_power_domains.c b/drivers/platform/x86/intel/tpmi_power_domains.c index 0c5c88eb7baf..9d8247bb9cfa 100644 --- a/drivers/platform/x86/intel/tpmi_power_domains.c +++ b/drivers/platform/x86/intel/tpmi_power_domains.c @@ -228,8 +228,10 @@ static int __init tpmi_init(void) domain_die_map = kcalloc(size_mul(topology_max_packages(), MAX_POWER_DOMAINS), sizeof(*domain_die_map), GFP_KERNEL); - if (!domain_die_map) + if (!domain_die_map) { + ret = -ENOMEM; goto free_domain_mask; + } ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "platform/x86/tpmi_power_domains:online", diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c index 0f8aea18275b..65897fae17df 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c @@ -58,7 +58,7 @@ static ssize_t show_agent_types(struct kobject *kobj, struct kobj_attribute *att if (length) length += sysfs_emit_at(buf, length, " "); - length += sysfs_emit_at(buf, length, agent_name[agent]); + length += sysfs_emit_at(buf, length, "%s", agent_name[agent]); } length += sysfs_emit_at(buf, length, "\n"); diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c index 1c7b2f2716ca..44d9948ed224 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c @@ -511,10 +511,13 @@ static int uncore_probe(struct auxiliary_device *auxdev, const struct auxiliary_ /* Get the package ID from the TPMI core */ plat_info = tpmi_get_platform_data(auxdev); - if (plat_info) - pkg = plat_info->package_id; - else + if (unlikely(!plat_info)) { dev_info(&auxdev->dev, "Platform information is NULL\n"); + ret = -ENODEV; + goto err_rem_common; + } + + pkg = plat_info->package_id; for (i = 0; i < num_resources; ++i) { struct tpmi_uncore_power_domain_info *pd_info; diff --git a/drivers/platform/x86/portwell-ec.c b/drivers/platform/x86/portwell-ec.c index 8b788822237b..3e019c51913e 100644 --- a/drivers/platform/x86/portwell-ec.c +++ b/drivers/platform/x86/portwell-ec.c @@ -236,6 +236,7 @@ static int pwec_probe(struct platform_device *pdev) return ret; } + ec_wdt_dev.parent = &pdev->dev; ret = devm_watchdog_register_device(&pdev->dev, &ec_wdt_dev); if (ret < 0) { dev_err(&pdev->dev, "failed to register Portwell EC Watchdog\n"); diff --git a/drivers/platform/x86/samsung-galaxybook.c b/drivers/platform/x86/samsung-galaxybook.c index 5878a351993e..3c13e13d4885 100644 --- a/drivers/platform/x86/samsung-galaxybook.c +++ b/drivers/platform/x86/samsung-galaxybook.c @@ -1403,6 +1403,7 @@ static int galaxybook_probe(struct platform_device *pdev) } static const struct acpi_device_id galaxybook_device_ids[] = { + { "SAM0426" }, { "SAM0427" }, { "SAM0428" }, { "SAM0429" }, diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 00b1e7c79a3d..b73b84fdb15e 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -973,6 +973,7 @@ static const struct attribute_group auth_attr_group = { .is_visible = auth_attr_is_visible, .attrs = auth_attrs, }; +__ATTRIBUTE_GROUPS(auth_attr); /* ---- Attributes sysfs --------------------------------------------------------- */ static ssize_t display_name_show(struct kobject *kobj, struct kobj_attribute *attr, @@ -1188,6 +1189,7 @@ static const struct attribute_group tlmi_attr_group = { .is_visible = attr_is_visible, .attrs = tlmi_attrs, }; +__ATTRIBUTE_GROUPS(tlmi_attr); static void tlmi_attr_setting_release(struct kobject *kobj) { @@ -1207,11 +1209,13 @@ static void tlmi_pwd_setting_release(struct kobject *kobj) static const struct kobj_type tlmi_attr_setting_ktype = { .release = &tlmi_attr_setting_release, .sysfs_ops = &kobj_sysfs_ops, + .default_groups = tlmi_attr_groups, }; static const struct kobj_type tlmi_pwd_setting_ktype = { .release = &tlmi_pwd_setting_release, .sysfs_ops = &kobj_sysfs_ops, + .default_groups = auth_attr_groups, }; static ssize_t pending_reboot_show(struct kobject *kobj, struct kobj_attribute *attr, @@ -1380,21 +1384,18 @@ static struct kobj_attribute debug_cmd = __ATTR_WO(debug_cmd); /* ---- Initialisation --------------------------------------------------------- */ static void tlmi_release_attr(void) { - int i; + struct kobject *pos, *n; /* Attribute structures */ - for (i = 0; i < TLMI_SETTINGS_COUNT; i++) { - if (tlmi_priv.setting[i]) { - sysfs_remove_group(&tlmi_priv.setting[i]->kobj, &tlmi_attr_group); - kobject_put(&tlmi_priv.setting[i]->kobj); - } - } sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr); sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &save_settings.attr); if (tlmi_priv.can_debug_cmd && debug_support) sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &debug_cmd.attr); + list_for_each_entry_safe(pos, n, &tlmi_priv.attribute_kset->list, entry) + kobject_put(pos); + kset_unregister(tlmi_priv.attribute_kset); /* Free up any saved signatures */ @@ -1402,19 +1403,8 @@ static void tlmi_release_attr(void) kfree(tlmi_priv.pwd_admin->save_signature); /* Authentication structures */ - sysfs_remove_group(&tlmi_priv.pwd_admin->kobj, &auth_attr_group); - kobject_put(&tlmi_priv.pwd_admin->kobj); - sysfs_remove_group(&tlmi_priv.pwd_power->kobj, &auth_attr_group); - kobject_put(&tlmi_priv.pwd_power->kobj); - - if (tlmi_priv.opcode_support) { - sysfs_remove_group(&tlmi_priv.pwd_system->kobj, &auth_attr_group); - kobject_put(&tlmi_priv.pwd_system->kobj); - sysfs_remove_group(&tlmi_priv.pwd_hdd->kobj, &auth_attr_group); - kobject_put(&tlmi_priv.pwd_hdd->kobj); - sysfs_remove_group(&tlmi_priv.pwd_nvme->kobj, &auth_attr_group); - kobject_put(&tlmi_priv.pwd_nvme->kobj); - } + list_for_each_entry_safe(pos, n, &tlmi_priv.authentication_kset->list, entry) + kobject_put(pos); kset_unregister(tlmi_priv.authentication_kset); } @@ -1455,6 +1445,14 @@ static int tlmi_sysfs_init(void) goto fail_device_created; } + tlmi_priv.authentication_kset = kset_create_and_add("authentication", NULL, + &tlmi_priv.class_dev->kobj); + if (!tlmi_priv.authentication_kset) { + kset_unregister(tlmi_priv.attribute_kset); + ret = -ENOMEM; + goto fail_device_created; + } + for (i = 0; i < TLMI_SETTINGS_COUNT; i++) { /* Check if index is a valid setting - skip if it isn't */ if (!tlmi_priv.setting[i]) @@ -1471,12 +1469,8 @@ static int tlmi_sysfs_init(void) /* Build attribute */ tlmi_priv.setting[i]->kobj.kset = tlmi_priv.attribute_kset; - ret = kobject_add(&tlmi_priv.setting[i]->kobj, NULL, - "%s", tlmi_priv.setting[i]->display_name); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.setting[i]->kobj, &tlmi_attr_group); + ret = kobject_init_and_add(&tlmi_priv.setting[i]->kobj, &tlmi_attr_setting_ktype, + NULL, "%s", tlmi_priv.setting[i]->display_name); if (ret) goto fail_create_attr; } @@ -1496,55 +1490,34 @@ static int tlmi_sysfs_init(void) } /* Create authentication entries */ - tlmi_priv.authentication_kset = kset_create_and_add("authentication", NULL, - &tlmi_priv.class_dev->kobj); - if (!tlmi_priv.authentication_kset) { - ret = -ENOMEM; - goto fail_create_attr; - } tlmi_priv.pwd_admin->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_add(&tlmi_priv.pwd_admin->kobj, NULL, "%s", "Admin"); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.pwd_admin->kobj, &auth_attr_group); + ret = kobject_init_and_add(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype, + NULL, "%s", "Admin"); if (ret) goto fail_create_attr; tlmi_priv.pwd_power->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_add(&tlmi_priv.pwd_power->kobj, NULL, "%s", "Power-on"); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.pwd_power->kobj, &auth_attr_group); + ret = kobject_init_and_add(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype, + NULL, "%s", "Power-on"); if (ret) goto fail_create_attr; if (tlmi_priv.opcode_support) { tlmi_priv.pwd_system->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_add(&tlmi_priv.pwd_system->kobj, NULL, "%s", "System"); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.pwd_system->kobj, &auth_attr_group); + ret = kobject_init_and_add(&tlmi_priv.pwd_system->kobj, &tlmi_pwd_setting_ktype, + NULL, "%s", "System"); if (ret) goto fail_create_attr; tlmi_priv.pwd_hdd->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_add(&tlmi_priv.pwd_hdd->kobj, NULL, "%s", "HDD"); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.pwd_hdd->kobj, &auth_attr_group); + ret = kobject_init_and_add(&tlmi_priv.pwd_hdd->kobj, &tlmi_pwd_setting_ktype, + NULL, "%s", "HDD"); if (ret) goto fail_create_attr; tlmi_priv.pwd_nvme->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_add(&tlmi_priv.pwd_nvme->kobj, NULL, "%s", "NVMe"); - if (ret) - goto fail_create_attr; - - ret = sysfs_create_group(&tlmi_priv.pwd_nvme->kobj, &auth_attr_group); + ret = kobject_init_and_add(&tlmi_priv.pwd_nvme->kobj, &tlmi_pwd_setting_ktype, + NULL, "%s", "NVMe"); if (ret) goto fail_create_attr; } @@ -1554,7 +1527,7 @@ static int tlmi_sysfs_init(void) fail_create_attr: tlmi_release_attr(); fail_device_created: - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(tlmi_priv.class_dev); fail_class_created: return ret; } @@ -1577,8 +1550,6 @@ static struct tlmi_pwd_setting *tlmi_create_auth(const char *pwd_type, new_pwd->maxlen = tlmi_priv.pwdcfg.core.max_length; new_pwd->index = 0; - kobject_init(&new_pwd->kobj, &tlmi_pwd_setting_ktype); - return new_pwd; } @@ -1683,7 +1654,6 @@ static int tlmi_analyze(struct wmi_device *wdev) if (setting->possible_values) strreplace(setting->possible_values, ',', ';'); - kobject_init(&setting->kobj, &tlmi_attr_setting_ktype); tlmi_priv.setting[i] = setting; kfree(item); } @@ -1781,7 +1751,7 @@ fail_clear_attr: static void tlmi_remove(struct wmi_device *wdev) { tlmi_release_attr(); - device_destroy(&firmware_attributes_class, MKDEV(0, 0)); + device_unregister(tlmi_priv.class_dev); } static int tlmi_probe(struct wmi_device *wdev, const void *context) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index e7350c9fa3aa..b59b4d90b0c7 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -3295,6 +3295,7 @@ static const struct key_entry keymap_lenovo[] __initconst = { */ { KE_KEY, 0x131d, { KEY_VENDOR } }, /* System debug info, similar to old ThinkPad key */ { KE_KEY, 0x1320, { KEY_LINK_PHONE } }, + { KE_KEY, 0x1402, { KEY_LINK_PHONE } }, { KE_KEY, TP_HKEY_EV_TRACK_DOUBLETAP /* 0x8036 */, { KEY_PROG4 } }, { KE_END } }; diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index e46453750d5f..03aecf8bb7f8 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -177,16 +177,22 @@ static int wmi_device_enable(struct wmi_device *wdev, bool enable) acpi_handle handle; acpi_status status; - if (!(wblock->gblock.flags & ACPI_WMI_EXPENSIVE)) - return 0; - if (wblock->dev.dev.type == &wmi_type_method) return 0; - if (wblock->dev.dev.type == &wmi_type_event) + if (wblock->dev.dev.type == &wmi_type_event) { + /* + * Windows always enables/disables WMI events, even when they are + * not marked as being expensive. We follow this behavior for + * compatibility reasons. + */ snprintf(method, sizeof(method), "WE%02X", wblock->gblock.notify_id); - else + } else { + if (!(wblock->gblock.flags & ACPI_WMI_EXPENSIVE)) + return 0; + get_acpi_method_name(wblock, 'C', method); + } /* * Not all WMI devices marked as expensive actually implement the diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index e3be40adc0d7..faa0b6bc5b53 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -341,12 +341,28 @@ static int set_domain_enable(struct powercap_zone *power_zone, bool mode) { struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); struct rapl_defaults *defaults = get_defaults(rd->rp); + u64 val; int ret; cpus_read_lock(); ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode); - if (!ret && defaults->set_floor_freq) + if (ret) + goto end; + + ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, false, &val); + if (ret) + goto end; + + if (mode != val) { + pr_debug("%s cannot be %s\n", power_zone->name, + str_enabled_disabled(mode)); + goto end; + } + + if (defaults->set_floor_freq) defaults->set_floor_freq(rd, mode); + +end: cpus_read_unlock(); return ret; diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 35a5994bf64f..36f57d7b4a66 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -121,7 +121,8 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx) struct ptp_clock_info *ops; int err = -EOPNOTSUPP; - if (ptp_clock_freerun(ptp)) { + if (tx->modes & (ADJ_SETOFFSET | ADJ_FREQUENCY | ADJ_OFFSET) && + ptp_clock_freerun(ptp)) { pr_err("ptp: physical clock is free running\n"); return -EBUSY; } diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 528d86a33f37..a6aad743c282 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -98,7 +98,27 @@ static inline int queue_cnt(const struct timestamp_event_queue *q) /* Check if ptp virtual clock is in use */ static inline bool ptp_vclock_in_use(struct ptp_clock *ptp) { - return !ptp->is_virtual_clock; + bool in_use = false; + + /* Virtual clocks can't be stacked on top of virtual clocks. + * Avoid acquiring the n_vclocks_mux on virtual clocks, to allow this + * function to be called from code paths where the n_vclocks_mux of the + * parent physical clock is already held. Functionally that's not an + * issue, but lockdep would complain, because they have the same lock + * class. + */ + if (ptp->is_virtual_clock) + return false; + + if (mutex_lock_interruptible(&ptp->n_vclocks_mux)) + return true; + + if (ptp->n_vclocks) + in_use = true; + + mutex_unlock(&ptp->n_vclocks_mux); + + return in_use; } /* Check if ptp clock shall be free running */ diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 4d842c692194..edf776b8ad53 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -596,7 +596,7 @@ static bool pwm_state_valid(const struct pwm_state *state) * and supposed to be ignored. So also ignore any strange values and * consider the state ok. */ - if (state->enabled) + if (!state->enabled) return true; if (!state->period) diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index 7eaab5831499..33d3554b9197 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -130,8 +130,10 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm, return ret; clk_rate = clk_get_rate(pc->clk_pwms[pwm->hwpwm]); - if (!clk_rate) - return -EINVAL; + if (!clk_rate) { + ret = -EINVAL; + goto out; + } /* Make sure we use the bus clock and not the 26MHz clock */ if (pc->soc->has_ck_26m_sel) @@ -150,9 +152,9 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm, } if (clkdiv > PWM_CLK_DIV_MAX) { - pwm_mediatek_clk_disable(chip, pwm); dev_err(pwmchip_parent(chip), "period of %d ns not supported\n", period_ns); - return -EINVAL; + ret = -EINVAL; + goto out; } if (pc->soc->pwm45_fixup && pwm->hwpwm > 2) { @@ -169,9 +171,10 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm, pwm_mediatek_writel(pc, pwm->hwpwm, reg_width, cnt_period); pwm_mediatek_writel(pc, pwm->hwpwm, reg_thres, cnt_duty); +out: pwm_mediatek_clk_disable(chip, pwm); - return 0; + return ret; } static int pwm_mediatek_enable(struct pwm_chip *chip, struct pwm_device *pwm) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 7a248dc8d2e2..cbd6d53ebfb5 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5639,6 +5639,7 @@ static void regulator_remove_coupling(struct regulator_dev *rdev) ERR_PTR(err)); } + rdev->coupling_desc.n_coupled = 0; kfree(rdev->coupling_desc.coupled_rdevs); rdev->coupling_desc.coupled_rdevs = NULL; } diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c index bd9447dac596..c282236959b1 100644 --- a/drivers/regulator/fan53555.c +++ b/drivers/regulator/fan53555.c @@ -147,6 +147,7 @@ struct fan53555_device_info { unsigned int slew_mask; const unsigned int *ramp_delay_table; unsigned int n_ramp_values; + unsigned int enable_time; unsigned int slew_rate; }; @@ -282,6 +283,7 @@ static int fan53526_voltages_setup_fairchild(struct fan53555_device_info *di) di->slew_mask = CTL_SLEW_MASK; di->ramp_delay_table = slew_rates; di->n_ramp_values = ARRAY_SIZE(slew_rates); + di->enable_time = 250; di->vsel_count = FAN53526_NVOLTAGES; return 0; @@ -296,10 +298,12 @@ static int fan53555_voltages_setup_fairchild(struct fan53555_device_info *di) case FAN53555_CHIP_REV_00: di->vsel_min = 600000; di->vsel_step = 10000; + di->enable_time = 400; break; case FAN53555_CHIP_REV_13: di->vsel_min = 800000; di->vsel_step = 10000; + di->enable_time = 400; break; default: dev_err(di->dev, @@ -311,13 +315,19 @@ static int fan53555_voltages_setup_fairchild(struct fan53555_device_info *di) case FAN53555_CHIP_ID_01: case FAN53555_CHIP_ID_03: case FAN53555_CHIP_ID_05: + di->vsel_min = 600000; + di->vsel_step = 10000; + di->enable_time = 400; + break; case FAN53555_CHIP_ID_08: di->vsel_min = 600000; di->vsel_step = 10000; + di->enable_time = 175; break; case FAN53555_CHIP_ID_04: di->vsel_min = 603000; di->vsel_step = 12826; + di->enable_time = 400; break; default: dev_err(di->dev, @@ -350,6 +360,7 @@ static int fan53555_voltages_setup_rockchip(struct fan53555_device_info *di) di->slew_mask = CTL_SLEW_MASK; di->ramp_delay_table = slew_rates; di->n_ramp_values = ARRAY_SIZE(slew_rates); + di->enable_time = 360; di->vsel_count = FAN53555_NVOLTAGES; return 0; @@ -372,6 +383,7 @@ static int rk8602_voltages_setup_rockchip(struct fan53555_device_info *di) di->slew_mask = CTL_SLEW_MASK; di->ramp_delay_table = slew_rates; di->n_ramp_values = ARRAY_SIZE(slew_rates); + di->enable_time = 360; di->vsel_count = RK8602_NVOLTAGES; return 0; @@ -395,6 +407,7 @@ static int fan53555_voltages_setup_silergy(struct fan53555_device_info *di) di->slew_mask = CTL_SLEW_MASK; di->ramp_delay_table = slew_rates; di->n_ramp_values = ARRAY_SIZE(slew_rates); + di->enable_time = 400; di->vsel_count = FAN53555_NVOLTAGES; return 0; @@ -594,6 +607,7 @@ static int fan53555_regulator_register(struct fan53555_device_info *di, rdesc->ramp_mask = di->slew_mask; rdesc->ramp_delay_table = di->ramp_delay_table; rdesc->n_ramp_values = di->n_ramp_values; + rdesc->enable_time = di->enable_time; rdesc->owner = THIS_MODULE; rdev = devm_regulator_register(di->dev, &di->desc, config); diff --git a/drivers/regulator/gpio-regulator.c b/drivers/regulator/gpio-regulator.c index 75bd53445ba7..6351ceefdb3e 100644 --- a/drivers/regulator/gpio-regulator.c +++ b/drivers/regulator/gpio-regulator.c @@ -260,8 +260,10 @@ static int gpio_regulator_probe(struct platform_device *pdev) return -ENOMEM; } - drvdata->gpiods = devm_kzalloc(dev, sizeof(struct gpio_desc *), - GFP_KERNEL); + drvdata->gpiods = devm_kcalloc(dev, config->ngpios, + sizeof(struct gpio_desc *), GFP_KERNEL); + if (!drvdata->gpiods) + return -ENOMEM; if (config->input_supply) { drvdata->desc.supply_name = devm_kstrdup(&pdev->dev, @@ -274,8 +276,6 @@ static int gpio_regulator_probe(struct platform_device *pdev) } } - if (!drvdata->gpiods) - return -ENOMEM; for (i = 0; i < config->ngpios; i++) { drvdata->gpiods[i] = devm_gpiod_get_index(dev, NULL, diff --git a/drivers/regulator/mp886x.c b/drivers/regulator/mp886x.c index 48dcee5287f3..9ad16b04c913 100644 --- a/drivers/regulator/mp886x.c +++ b/drivers/regulator/mp886x.c @@ -348,7 +348,8 @@ static const struct of_device_id mp886x_dt_ids[] = { MODULE_DEVICE_TABLE(of, mp886x_dt_ids); static const struct i2c_device_id mp886x_id[] = { - { "mp886x", (kernel_ulong_t)&mp8869_ci }, + { "mp8867", (kernel_ulong_t)&mp8867_ci }, + { "mp8869", (kernel_ulong_t)&mp8869_ci }, { }, }; MODULE_DEVICE_TABLE(i2c, mp886x_id); diff --git a/drivers/regulator/sy8824x.c b/drivers/regulator/sy8824x.c index c05b67e26ac8..5bec84db25f1 100644 --- a/drivers/regulator/sy8824x.c +++ b/drivers/regulator/sy8824x.c @@ -213,7 +213,10 @@ static const struct of_device_id sy8824_dt_ids[] = { MODULE_DEVICE_TABLE(of, sy8824_dt_ids); static const struct i2c_device_id sy8824_id[] = { - { "sy8824", (kernel_ulong_t)&sy8824c_cfg }, + { "sy8824c", (kernel_ulong_t)&sy8824c_cfg }, + { "sy8824e", (kernel_ulong_t)&sy8824e_cfg }, + { "sy20276", (kernel_ulong_t)&sy20276_cfg }, + { "sy20278", (kernel_ulong_t)&sy20278_cfg }, { } }; MODULE_DEVICE_TABLE(i2c, sy8824_id); diff --git a/drivers/regulator/tps65219-regulator.c b/drivers/regulator/tps65219-regulator.c index b16b300d7f45..5e67fdc88f49 100644 --- a/drivers/regulator/tps65219-regulator.c +++ b/drivers/regulator/tps65219-regulator.c @@ -436,46 +436,46 @@ static int tps65219_regulator_probe(struct platform_device *pdev) pmic->rdesc[i].name); } - irq_data = devm_kmalloc(tps->dev, pmic->common_irq_size, GFP_KERNEL); - if (!irq_data) - return -ENOMEM; - for (i = 0; i < pmic->common_irq_size; ++i) { irq_type = &pmic->common_irq_types[i]; irq = platform_get_irq_byname(pdev, irq_type->irq_name); if (irq < 0) return -EINVAL; - irq_data[i].dev = tps->dev; - irq_data[i].type = irq_type; + irq_data = devm_kmalloc(tps->dev, sizeof(*irq_data), GFP_KERNEL); + if (!irq_data) + return -ENOMEM; + + irq_data->dev = tps->dev; + irq_data->type = irq_type; error = devm_request_threaded_irq(tps->dev, irq, NULL, tps65219_regulator_irq_handler, IRQF_ONESHOT, irq_type->irq_name, - &irq_data[i]); + irq_data); if (error) return dev_err_probe(tps->dev, PTR_ERR(rdev), "Failed to request %s IRQ %d: %d\n", irq_type->irq_name, irq, error); } - irq_data = devm_kmalloc(tps->dev, pmic->dev_irq_size, GFP_KERNEL); - if (!irq_data) - return -ENOMEM; - for (i = 0; i < pmic->dev_irq_size; ++i) { irq_type = &pmic->irq_types[i]; irq = platform_get_irq_byname(pdev, irq_type->irq_name); if (irq < 0) return -EINVAL; - irq_data[i].dev = tps->dev; - irq_data[i].type = irq_type; + irq_data = devm_kmalloc(tps->dev, sizeof(*irq_data), GFP_KERNEL); + if (!irq_data) + return -ENOMEM; + + irq_data->dev = tps->dev; + irq_data->type = irq_type; error = devm_request_threaded_irq(tps->dev, irq, NULL, tps65219_regulator_irq_handler, IRQF_ONESHOT, irq_type->irq_name, - &irq_data[i]); + irq_data); if (error) return dev_err_probe(tps->dev, PTR_ERR(rdev), "Failed to request %s IRQ %d: %d\n", diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 8172869bd3d7..0743c6acd6e2 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -692,8 +692,12 @@ static irqreturn_t cmos_interrupt(int irq, void *p) { u8 irqstat; u8 rtc_control; + unsigned long flags; - spin_lock(&rtc_lock); + /* We cannot use spin_lock() here, as cmos_interrupt() is also called + * in a non-irq context. + */ + spin_lock_irqsave(&rtc_lock, flags); /* When the HPET interrupt handler calls us, the interrupt * status is passed as arg1 instead of the irq number. But @@ -727,7 +731,7 @@ static irqreturn_t cmos_interrupt(int irq, void *p) hpet_mask_rtc_irq_bit(RTC_AIE); CMOS_READ(RTC_INTR_FLAGS); } - spin_unlock(&rtc_lock); + spin_unlock_irqrestore(&rtc_lock, flags); if (is_intr(irqstat)) { rtc_update_irq(p, 1, irqstat); @@ -1295,9 +1299,7 @@ static void cmos_check_wkalrm(struct device *dev) * ACK the rtc irq here */ if (t_now >= cmos->alarm_expires && cmos_use_acpi_alarm()) { - local_irq_disable(); cmos_interrupt(0, (void *)cmos->rtc); - local_irq_enable(); return; } diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 31c7dca8f469..2e1ac0c42e93 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -1538,7 +1538,12 @@ static int pcf2127_spi_probe(struct spi_device *spi) variant = &pcf21xx_cfg[type]; } - config.max_register = variant->max_register, + if (variant->type == PCF2131) { + config.read_flag_mask = 0x0; + config.write_flag_mask = 0x0; + } + + config.max_register = variant->max_register; regmap = devm_regmap_init_spi(spi, &config); if (IS_ERR(regmap)) { diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c index db5c9b641277..a7220b4d0e8d 100644 --- a/drivers/rtc/rtc-s5m.c +++ b/drivers/rtc/rtc-s5m.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/i2c.h> #include <linux/bcd.h> +#include <linux/reboot.h> #include <linux/regmap.h> #include <linux/rtc.h> #include <linux/platform_device.h> @@ -53,6 +54,7 @@ enum { * Device | Write time | Read time | Write alarm * ================================================= * S5M8767 | UDR + TIME | | UDR + * S2MPG10 | WUDR | RUDR | AUDR * S2MPS11/14 | WUDR | RUDR | WUDR + RUDR * S2MPS13 | WUDR | RUDR | WUDR + AUDR * S2MPS15 | WUDR | RUDR | AUDR @@ -99,6 +101,20 @@ static const struct s5m_rtc_reg_config s5m_rtc_regs = { .write_alarm_udr_mask = S5M_RTC_UDR_MASK, }; +/* Register map for S2MPG10 */ +static const struct s5m_rtc_reg_config s2mpg10_rtc_regs = { + .regs_count = 7, + .time = S2MPG10_RTC_SEC, + .ctrl = S2MPG10_RTC_CTRL, + .alarm0 = S2MPG10_RTC_A0SEC, + .alarm1 = S2MPG10_RTC_A1SEC, + .udr_update = S2MPG10_RTC_UPDATE, + .autoclear_udr_mask = S2MPS15_RTC_WUDR_MASK | S2MPS15_RTC_AUDR_MASK, + .read_time_udr_mask = S2MPS_RTC_RUDR_MASK, + .write_time_udr_mask = S2MPS15_RTC_WUDR_MASK, + .write_alarm_udr_mask = S2MPS15_RTC_AUDR_MASK, +}; + /* Register map for S2MPS13 */ static const struct s5m_rtc_reg_config s2mps13_rtc_regs = { .regs_count = 7, @@ -227,8 +243,8 @@ static int s5m8767_wait_for_udr_update(struct s5m_rtc_info *info) return ret; } -static int s5m_check_peding_alarm_interrupt(struct s5m_rtc_info *info, - struct rtc_wkalrm *alarm) +static int s5m_check_pending_alarm_interrupt(struct s5m_rtc_info *info, + struct rtc_wkalrm *alarm) { int ret; unsigned int val; @@ -238,6 +254,7 @@ static int s5m_check_peding_alarm_interrupt(struct s5m_rtc_info *info, ret = regmap_read(info->regmap, S5M_RTC_STATUS, &val); val &= S5M_ALARM0_STATUS; break; + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -262,17 +279,9 @@ static int s5m_check_peding_alarm_interrupt(struct s5m_rtc_info *info, static int s5m8767_rtc_set_time_reg(struct s5m_rtc_info *info) { int ret; - unsigned int data; - ret = regmap_read(info->regmap, info->regs->udr_update, &data); - if (ret < 0) { - dev_err(info->dev, "failed to read update reg(%d)\n", ret); - return ret; - } - - data |= info->regs->write_time_udr_mask; - - ret = regmap_write(info->regmap, info->regs->udr_update, data); + ret = regmap_set_bits(info->regmap, info->regs->udr_update, + info->regs->write_time_udr_mask); if (ret < 0) { dev_err(info->dev, "failed to write update reg(%d)\n", ret); return ret; @@ -286,20 +295,14 @@ static int s5m8767_rtc_set_time_reg(struct s5m_rtc_info *info) static int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info) { int ret; - unsigned int data; + unsigned int udr_mask; - ret = regmap_read(info->regmap, info->regs->udr_update, &data); - if (ret < 0) { - dev_err(info->dev, "%s: fail to read update reg(%d)\n", - __func__, ret); - return ret; - } - - data |= info->regs->write_alarm_udr_mask; + udr_mask = info->regs->write_alarm_udr_mask; switch (info->device_type) { case S5M8767X: - data &= ~S5M_RTC_TIME_EN_MASK; + udr_mask |= S5M_RTC_TIME_EN_MASK; break; + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -309,7 +312,8 @@ static int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info) return -EINVAL; } - ret = regmap_write(info->regmap, info->regs->udr_update, data); + ret = regmap_update_bits(info->regmap, info->regs->udr_update, + udr_mask, info->regs->write_alarm_udr_mask); if (ret < 0) { dev_err(info->dev, "%s: fail to write update reg(%d)\n", __func__, ret); @@ -320,8 +324,8 @@ static int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info) /* On S2MPS13 the AUDR is not auto-cleared */ if (info->device_type == S2MPS13X) - regmap_update_bits(info->regmap, info->regs->udr_update, - S2MPS13_RTC_AUDR_MASK, 0); + regmap_clear_bits(info->regmap, info->regs->udr_update, + S2MPS13_RTC_AUDR_MASK); return ret; } @@ -333,10 +337,8 @@ static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm) int ret; if (info->regs->read_time_udr_mask) { - ret = regmap_update_bits(info->regmap, - info->regs->udr_update, - info->regs->read_time_udr_mask, - info->regs->read_time_udr_mask); + ret = regmap_set_bits(info->regmap, info->regs->udr_update, + info->regs->read_time_udr_mask); if (ret) { dev_err(dev, "Failed to prepare registers for time reading: %d\n", @@ -351,6 +353,7 @@ static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -374,6 +377,7 @@ static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -411,6 +415,7 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -430,7 +435,7 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) dev_dbg(dev, "%s: %ptR(%d)\n", __func__, &alrm->time, alrm->time.tm_wday); - return s5m_check_peding_alarm_interrupt(info, alrm); + return s5m_check_pending_alarm_interrupt(info, alrm); } static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info) @@ -449,6 +454,7 @@ static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -487,6 +493,7 @@ static int s5m_rtc_start_alarm(struct s5m_rtc_info *info) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -524,6 +531,7 @@ static int s5m_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) switch (info->device_type) { case S5M8767X: + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -604,6 +612,7 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info) ret = regmap_raw_write(info->regmap, S5M_ALARM0_CONF, data, 2); break; + case S2MPG10: case S2MPS15X: case S2MPS14X: case S2MPS13X: @@ -634,59 +643,92 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info) return ret; } +static int s5m_rtc_restart_s2mpg10(struct sys_off_data *data) +{ + struct s5m_rtc_info *info = data->cb_data; + int ret; + + if (data->mode != REBOOT_COLD && data->mode != REBOOT_HARD) + return NOTIFY_DONE; + + /* + * Arm watchdog with maximum timeout (2 seconds), and perform full reset + * on expiry. + */ + ret = regmap_set_bits(info->regmap, S2MPG10_RTC_WTSR, + (S2MPG10_WTSR_COLDTIMER | S2MPG10_WTSR_COLDRST + | S2MPG10_WTSR_WTSRT | S2MPG10_WTSR_WTSR_EN)); + + return ret ? NOTIFY_BAD : NOTIFY_DONE; +} + static int s5m_rtc_probe(struct platform_device *pdev) { struct sec_pmic_dev *s5m87xx = dev_get_drvdata(pdev->dev.parent); + enum sec_device_type device_type = + platform_get_device_id(pdev)->driver_data; struct s5m_rtc_info *info; - struct i2c_client *i2c; - const struct regmap_config *regmap_cfg; int ret, alarm_irq; info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; - switch (platform_get_device_id(pdev)->driver_data) { - case S2MPS15X: - regmap_cfg = &s2mps14_rtc_regmap_config; - info->regs = &s2mps15_rtc_regs; - alarm_irq = S2MPS14_IRQ_RTCA0; - break; - case S2MPS14X: - regmap_cfg = &s2mps14_rtc_regmap_config; - info->regs = &s2mps14_rtc_regs; - alarm_irq = S2MPS14_IRQ_RTCA0; - break; - case S2MPS13X: - regmap_cfg = &s2mps14_rtc_regmap_config; - info->regs = &s2mps13_rtc_regs; - alarm_irq = S2MPS14_IRQ_RTCA0; - break; - case S5M8767X: - regmap_cfg = &s5m_rtc_regmap_config; - info->regs = &s5m_rtc_regs; - alarm_irq = S5M8767_IRQ_RTCA1; - break; - default: - return dev_err_probe(&pdev->dev, -ENODEV, - "Device type %lu is not supported by RTC driver\n", - platform_get_device_id(pdev)->driver_data); - } + info->regmap = dev_get_regmap(pdev->dev.parent, "rtc"); + if (!info->regmap) { + const struct regmap_config *regmap_cfg; + struct i2c_client *i2c; - i2c = devm_i2c_new_dummy_device(&pdev->dev, s5m87xx->i2c->adapter, - RTC_I2C_ADDR); - if (IS_ERR(i2c)) - return dev_err_probe(&pdev->dev, PTR_ERR(i2c), - "Failed to allocate I2C for RTC\n"); + switch (device_type) { + case S2MPS15X: + regmap_cfg = &s2mps14_rtc_regmap_config; + info->regs = &s2mps15_rtc_regs; + alarm_irq = S2MPS14_IRQ_RTCA0; + break; + case S2MPS14X: + regmap_cfg = &s2mps14_rtc_regmap_config; + info->regs = &s2mps14_rtc_regs; + alarm_irq = S2MPS14_IRQ_RTCA0; + break; + case S2MPS13X: + regmap_cfg = &s2mps14_rtc_regmap_config; + info->regs = &s2mps13_rtc_regs; + alarm_irq = S2MPS14_IRQ_RTCA0; + break; + case S5M8767X: + regmap_cfg = &s5m_rtc_regmap_config; + info->regs = &s5m_rtc_regs; + alarm_irq = S5M8767_IRQ_RTCA1; + break; + default: + return dev_err_probe(&pdev->dev, -ENODEV, + "Unsupported device type %d\n", + device_type); + } - info->regmap = devm_regmap_init_i2c(i2c, regmap_cfg); - if (IS_ERR(info->regmap)) - return dev_err_probe(&pdev->dev, PTR_ERR(info->regmap), - "Failed to allocate RTC register map\n"); + i2c = devm_i2c_new_dummy_device(&pdev->dev, + s5m87xx->i2c->adapter, + RTC_I2C_ADDR); + if (IS_ERR(i2c)) + return dev_err_probe(&pdev->dev, PTR_ERR(i2c), + "Failed to allocate I2C\n"); + + info->regmap = devm_regmap_init_i2c(i2c, regmap_cfg); + if (IS_ERR(info->regmap)) + return dev_err_probe(&pdev->dev, PTR_ERR(info->regmap), + "Failed to allocate regmap\n"); + } else if (device_type == S2MPG10) { + info->regs = &s2mpg10_rtc_regs; + alarm_irq = S2MPG10_IRQ_RTCA0; + } else { + return dev_err_probe(&pdev->dev, -ENODEV, + "Unsupported device type %d\n", + device_type); + } info->dev = &pdev->dev; info->s5m87xx = s5m87xx; - info->device_type = platform_get_device_id(pdev)->driver_data; + info->device_type = device_type; if (s5m87xx->irq_data) { info->irq = regmap_irq_get_virq(s5m87xx->irq_data, alarm_irq); @@ -721,7 +763,23 @@ static int s5m_rtc_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, ret, "Failed to request alarm IRQ %d\n", info->irq); - device_init_wakeup(&pdev->dev, true); + + ret = devm_device_init_wakeup(&pdev->dev); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "Failed to init wakeup\n"); + } + + if (of_device_is_system_power_controller(pdev->dev.parent->of_node) && + info->device_type == S2MPG10) { + ret = devm_register_sys_off_handler(&pdev->dev, + SYS_OFF_MODE_RESTART, + SYS_OFF_PRIO_HIGH + 1, + s5m_rtc_restart_s2mpg10, + info); + if (ret) + return dev_err_probe(&pdev->dev, ret, + "Failed to register restart handler\n"); } return devm_rtc_register_device(info->rtc_dev); @@ -755,6 +813,7 @@ static SIMPLE_DEV_PM_OPS(s5m_rtc_pm_ops, s5m_rtc_suspend, s5m_rtc_resume); static const struct platform_device_id s5m_rtc_id[] = { { "s5m-rtc", S5M8767X }, + { "s2mpg10-rtc", S2MPG10 }, { "s2mps13-rtc", S2MPS13X }, { "s2mps14-rtc", S2MPS14X }, { "s2mps15-rtc", S2MPS15X }, diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index cef60770f68b..b3fcdcae379e 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -86,7 +86,7 @@ static void *_copy_apqns_from_user(void __user *uapqns, size_t nr_apqns) if (!uapqns || nr_apqns == 0) return NULL; - return memdup_user(uapqns, nr_apqns * sizeof(struct pkey_apqn)); + return memdup_array_user(uapqns, nr_apqns, sizeof(struct pkey_apqn)); } static int pkey_ioctl_genseck(struct pkey_genseck __user *ugs) diff --git a/drivers/scsi/elx/efct/efct_hw.c b/drivers/scsi/elx/efct/efct_hw.c index 5a5525054d71..5b079b8b7a08 100644 --- a/drivers/scsi/elx/efct/efct_hw.c +++ b/drivers/scsi/elx/efct/efct_hw.c @@ -1120,7 +1120,7 @@ int efct_hw_parse_filter(struct efct_hw *hw, void *value) { int rc = 0; - char *p = NULL; + char *p = NULL, *pp = NULL; char *token; u32 idx = 0; @@ -1132,6 +1132,7 @@ efct_hw_parse_filter(struct efct_hw *hw, void *value) efc_log_err(hw->os, "p is NULL\n"); return -ENOMEM; } + pp = p; idx = 0; while ((token = strsep(&p, ",")) && *token) { @@ -1144,7 +1145,7 @@ efct_hw_parse_filter(struct efct_hw *hw, void *value) if (idx == ARRAY_SIZE(hw->config.filter_def)) break; } - kfree(p); + kfree(pp); return rc; } diff --git a/drivers/scsi/fnic/fdls_disc.c b/drivers/scsi/fnic/fdls_disc.c index f8ab69c51dab..ae37f85f618b 100644 --- a/drivers/scsi/fnic/fdls_disc.c +++ b/drivers/scsi/fnic/fdls_disc.c @@ -763,50 +763,86 @@ static void fdls_send_fabric_abts(struct fnic_iport_s *iport) iport->fabric.timer_pending = 1; } -static void fdls_send_fdmi_abts(struct fnic_iport_s *iport) +static uint8_t *fdls_alloc_init_fdmi_abts_frame(struct fnic_iport_s *iport, + uint16_t oxid) { - uint8_t *frame; + struct fc_frame_header *pfdmi_abts; uint8_t d_id[3]; + uint8_t *frame; struct fnic *fnic = iport->fnic; - struct fc_frame_header *pfabric_abts; - unsigned long fdmi_tov; - uint16_t oxid; - uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET + - sizeof(struct fc_frame_header); frame = fdls_alloc_frame(iport); if (frame == NULL) { FNIC_FCS_DBG(KERN_ERR, fnic->host, fnic->fnic_num, "Failed to allocate frame to send FDMI ABTS"); - return; + return NULL; } - pfabric_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET); + pfdmi_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET); fdls_init_fabric_abts_frame(frame, iport); hton24(d_id, FC_FID_MGMT_SERV); - FNIC_STD_SET_D_ID(*pfabric_abts, d_id); + FNIC_STD_SET_D_ID(*pfdmi_abts, d_id); + FNIC_STD_SET_OX_ID(*pfdmi_abts, oxid); + + return frame; +} + +static void fdls_send_fdmi_abts(struct fnic_iport_s *iport) +{ + uint8_t *frame; + struct fnic *fnic = iport->fnic; + unsigned long fdmi_tov; + uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET + + sizeof(struct fc_frame_header); if (iport->fabric.fdmi_pending & FDLS_FDMI_PLOGI_PENDING) { - oxid = iport->active_oxid_fdmi_plogi; - FNIC_STD_SET_OX_ID(*pfabric_abts, oxid); + frame = fdls_alloc_init_fdmi_abts_frame(iport, + iport->active_oxid_fdmi_plogi); + if (frame == NULL) + return; + + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: FDLS send FDMI PLOGI abts. iport->fabric.state: %d oxid: 0x%x", + iport->fcid, iport->fabric.state, iport->active_oxid_fdmi_plogi); fnic_send_fcoe_frame(iport, frame, frame_size); } else { if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) { - oxid = iport->active_oxid_fdmi_rhba; - FNIC_STD_SET_OX_ID(*pfabric_abts, oxid); + frame = fdls_alloc_init_fdmi_abts_frame(iport, + iport->active_oxid_fdmi_rhba); + if (frame == NULL) + return; + + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: FDLS send FDMI RHBA abts. iport->fabric.state: %d oxid: 0x%x", + iport->fcid, iport->fabric.state, iport->active_oxid_fdmi_rhba); fnic_send_fcoe_frame(iport, frame, frame_size); } if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) { - oxid = iport->active_oxid_fdmi_rpa; - FNIC_STD_SET_OX_ID(*pfabric_abts, oxid); + frame = fdls_alloc_init_fdmi_abts_frame(iport, + iport->active_oxid_fdmi_rpa); + if (frame == NULL) { + if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) + goto arm_timer; + else + return; + } + + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: FDLS send FDMI RPA abts. iport->fabric.state: %d oxid: 0x%x", + iport->fcid, iport->fabric.state, iport->active_oxid_fdmi_rpa); fnic_send_fcoe_frame(iport, frame, frame_size); } } +arm_timer: fdmi_tov = jiffies + msecs_to_jiffies(2 * iport->e_d_tov); mod_timer(&iport->fabric.fdmi_timer, round_jiffies(fdmi_tov)); iport->fabric.fdmi_pending |= FDLS_FDMI_ABORT_PENDING; + + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); } static void fdls_send_fabric_flogi(struct fnic_iport_s *iport) @@ -2245,6 +2281,21 @@ void fdls_fabric_timer_callback(struct timer_list *t) spin_unlock_irqrestore(&fnic->fnic_lock, flags); } +void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport) +{ + struct fnic *fnic = iport->fnic; + + iport->fabric.fdmi_pending = 0; + /* If max retries not exhausted, start over from fdmi plogi */ + if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) { + iport->fabric.fdmi_retry++; + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "Retry FDMI PLOGI. FDMI retry: %d", + iport->fabric.fdmi_retry); + fdls_send_fdmi_plogi(iport); + } +} + void fdls_fdmi_timer_callback(struct timer_list *t) { struct fnic_fdls_fabric_s *fabric = timer_container_of(fabric, t, @@ -2257,7 +2308,7 @@ void fdls_fdmi_timer_callback(struct timer_list *t) spin_lock_irqsave(&fnic->fnic_lock, flags); FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); + "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending); if (!iport->fabric.fdmi_pending) { /* timer expired after fdmi responses received. */ @@ -2265,7 +2316,7 @@ void fdls_fdmi_timer_callback(struct timer_list *t) return; } FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); + "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending); /* if not abort pending, send an abort */ if (!(iport->fabric.fdmi_pending & FDLS_FDMI_ABORT_PENDING)) { @@ -2274,33 +2325,37 @@ void fdls_fdmi_timer_callback(struct timer_list *t) return; } FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); + "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending); /* ABTS pending for an active fdmi request that is pending. * That means FDMI ABTS timed out * Schedule to free the OXID after 2*r_a_tov and proceed */ if (iport->fabric.fdmi_pending & FDLS_FDMI_PLOGI_PENDING) { + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "FDMI PLOGI ABTS timed out. Schedule oxid free: 0x%x\n", + iport->active_oxid_fdmi_plogi); fdls_schedule_oxid_free(iport, &iport->active_oxid_fdmi_plogi); } else { - if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) + if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) { + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "FDMI RHBA ABTS timed out. Schedule oxid free: 0x%x\n", + iport->active_oxid_fdmi_rhba); fdls_schedule_oxid_free(iport, &iport->active_oxid_fdmi_rhba); - if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) + } + if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) { + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "FDMI RPA ABTS timed out. Schedule oxid free: 0x%x\n", + iport->active_oxid_fdmi_rpa); fdls_schedule_oxid_free(iport, &iport->active_oxid_fdmi_rpa); + } } FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); + "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending); - iport->fabric.fdmi_pending = 0; - /* If max retries not exhaused, start over from fdmi plogi */ - if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) { - iport->fabric.fdmi_retry++; - FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "retry fdmi timer %d", iport->fabric.fdmi_retry); - fdls_send_fdmi_plogi(iport); - } + fdls_fdmi_retry_plogi(iport); FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, - "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); + "iport->fabric.fdmi_pending: 0x%x\n", iport->fabric.fdmi_pending); spin_unlock_irqrestore(&fnic->fnic_lock, flags); } @@ -3715,13 +3770,60 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport, switch (FNIC_FRAME_TYPE(oxid)) { case FNIC_FRAME_TYPE_FDMI_PLOGI: + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "Received FDMI PLOGI ABTS rsp with oxid: 0x%x", oxid); + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_plogi); + + iport->fabric.fdmi_pending &= ~FDLS_FDMI_PLOGI_PENDING; + iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); break; case FNIC_FRAME_TYPE_FDMI_RHBA: + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "Received FDMI RHBA ABTS rsp with oxid: 0x%x", oxid); + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); + + iport->fabric.fdmi_pending &= ~FDLS_FDMI_REG_HBA_PENDING; + + /* If RPA is still pending, don't turn off ABORT PENDING. + * We count on the timer to detect the ABTS timeout and take + * corrective action. + */ + if (!(iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING)) + iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; + fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rhba); + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); break; case FNIC_FRAME_TYPE_FDMI_RPA: + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "Received FDMI RPA ABTS rsp with oxid: 0x%x", oxid); + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); + + iport->fabric.fdmi_pending &= ~FDLS_FDMI_RPA_PENDING; + + /* If RHBA is still pending, don't turn off ABORT PENDING. + * We count on the timer to detect the ABTS timeout and take + * corrective action. + */ + if (!(iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING)) + iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; + fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rpa); + FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + "0x%x: iport->fabric.fdmi_pending: 0x%x", + iport->fcid, iport->fabric.fdmi_pending); break; default: FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, @@ -3730,10 +3832,16 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport, break; } - timer_delete_sync(&iport->fabric.fdmi_timer); - iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; - - fdls_send_fdmi_plogi(iport); + /* + * Only if ABORT PENDING is off, delete the timer, and if no other + * operations are pending, retry FDMI. + * Otherwise, let the timer pop and take the appropriate action. + */ + if (!(iport->fabric.fdmi_pending & FDLS_FDMI_ABORT_PENDING)) { + timer_delete_sync(&iport->fabric.fdmi_timer); + if (!iport->fabric.fdmi_pending) + fdls_fdmi_retry_plogi(iport); + } } static void @@ -4972,9 +5080,12 @@ void fnic_fdls_link_down(struct fnic_iport_s *iport) fdls_delete_tport(iport, tport); } - if ((fnic_fdmi_support == 1) && (iport->fabric.fdmi_pending > 0)) { - timer_delete_sync(&iport->fabric.fdmi_timer); - iport->fabric.fdmi_pending = 0; + if (fnic_fdmi_support == 1) { + if (iport->fabric.fdmi_pending > 0) { + timer_delete_sync(&iport->fabric.fdmi_timer); + iport->fabric.fdmi_pending = 0; + } + iport->flags &= ~FNIC_FDMI_ACTIVE; } FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index 6c5f6046b1f5..c2fdc6553e62 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -30,7 +30,7 @@ #define DRV_NAME "fnic" #define DRV_DESCRIPTION "Cisco FCoE HBA Driver" -#define DRV_VERSION "1.8.0.0" +#define DRV_VERSION "1.8.0.2" #define PFX DRV_NAME ": " #define DFX DRV_NAME "%d: " diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c index 1e8cd64f9a5c..103ab6f1f7cd 100644 --- a/drivers/scsi/fnic/fnic_fcs.c +++ b/drivers/scsi/fnic/fnic_fcs.c @@ -636,6 +636,8 @@ static int fnic_send_frame(struct fnic *fnic, void *frame, int frame_len) unsigned long flags; pa = dma_map_single(&fnic->pdev->dev, frame, frame_len, DMA_TO_DEVICE); + if (dma_mapping_error(&fnic->pdev->dev, pa)) + return -ENOMEM; if ((fnic_fc_trace_set_data(fnic->fnic_num, FNIC_FC_SEND | 0x80, (char *) frame, diff --git a/drivers/scsi/fnic/fnic_fdls.h b/drivers/scsi/fnic/fnic_fdls.h index 8e610b65ad57..531d0b37e450 100644 --- a/drivers/scsi/fnic/fnic_fdls.h +++ b/drivers/scsi/fnic/fnic_fdls.h @@ -394,6 +394,7 @@ void fdls_send_tport_abts(struct fnic_iport_s *iport, bool fdls_delete_tport(struct fnic_iport_s *iport, struct fnic_tport_s *tport); void fdls_fdmi_timer_callback(struct timer_list *t); +void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport); /* fnic_fcs.c */ void fnic_fdls_init(struct fnic *fnic, int usefip); diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 7133b254cbe4..75b29a018d1f 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -1046,7 +1046,7 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, unsigned int cq_ind if (icmnd_cmpl->scsi_status == SAM_STAT_TASK_SET_FULL) atomic64_inc(&fnic_stats->misc_stats.queue_fulls); - FNIC_SCSI_DBG(KERN_INFO, fnic->host, fnic->fnic_num, + FNIC_SCSI_DBG(KERN_DEBUG, fnic->host, fnic->fnic_num, "xfer_len: %llu", xfer_len); break; diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index e021f1106bea..cc5d05dc395c 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -473,10 +473,17 @@ struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int priv else shost->max_sectors = SCSI_DEFAULT_MAX_SECTORS; - if (sht->max_segment_size) - shost->max_segment_size = sht->max_segment_size; - else - shost->max_segment_size = BLK_MAX_SEGMENT_SIZE; + shost->virt_boundary_mask = sht->virt_boundary_mask; + if (shost->virt_boundary_mask) { + WARN_ON_ONCE(sht->max_segment_size && + sht->max_segment_size != UINT_MAX); + shost->max_segment_size = UINT_MAX; + } else { + if (sht->max_segment_size) + shost->max_segment_size = sht->max_segment_size; + else + shost->max_segment_size = BLK_MAX_SEGMENT_SIZE; + } /* 32-byte (dword) is a common minimum for HBAs. */ if (sht->dma_alignment) @@ -492,9 +499,6 @@ struct Scsi_Host *scsi_host_alloc(const struct scsi_host_template *sht, int priv else shost->dma_boundary = 0xffffffff; - if (sht->virt_boundary_mask) - shost->virt_boundary_mask = sht->virt_boundary_mask; - device_initialize(&shost->shost_gendev); dev_set_name(&shost->shost_gendev, "host%d", shost->host_no); shost->shost_gendev.bus = &scsi_bus_type; diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 3aac0e17cb00..9179f8aee964 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5910,7 +5910,11 @@ megasas_set_high_iops_queue_affinity_and_hint(struct megasas_instance *instance) const struct cpumask *mask; if (instance->perf_mode == MR_BALANCED_PERF_MODE) { - mask = cpumask_of_node(dev_to_node(&instance->pdev->dev)); + int nid = dev_to_node(&instance->pdev->dev); + + if (nid == NUMA_NO_NODE) + nid = 0; + mask = cpumask_of_node(nid); for (i = 0; i < instance->low_latency_index_start; i++) { irq = pci_irq_vector(instance->pdev, i); diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 0cd6f3e14882..13b6cb1b93ac 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -2147,7 +2147,7 @@ qla24xx_get_port_database(scsi_qla_host_t *vha, u16 nport_handle, pdb_dma = dma_map_single(&vha->hw->pdev->dev, pdb, sizeof(*pdb), DMA_FROM_DEVICE); - if (!pdb_dma) { + if (dma_mapping_error(&vha->hw->pdev->dev, pdb_dma)) { ql_log(ql_log_warn, vha, 0x1116, "Failed to map dma buffer.\n"); return QLA_MEMORY_ALLOC_FAILED; } diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index d4141656b204..a39f1da4ce47 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -3420,6 +3420,8 @@ static int qla4xxx_alloc_pdu(struct iscsi_task *task, uint8_t opcode) task_data->data_dma = dma_map_single(&ha->pdev->dev, task->data, task->data_count, DMA_TO_DEVICE); + if (dma_mapping_error(&ha->pdev->dev, task_data->data_dma)) + return -ENOMEM; } DEBUG2(ql4_printk(KERN_INFO, ha, "%s: MaxRecvLen %u, iscsi hrd %d\n", diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3f6e87705b62..eeaa6af294b8 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3384,7 +3384,7 @@ static void sd_read_block_limits_ext(struct scsi_disk *sdkp) rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb7); - if (vpd && vpd->len >= 2) + if (vpd && vpd->len >= 6) sdkp->rscs = vpd->data[5] & 1; rcu_read_unlock(); } diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index fe0f122f07b0..d3c78f59b22c 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1958,12 +1958,7 @@ static int cqspi_probe(struct platform_device *pdev) goto probe_setup_failed; } - ret = devm_pm_runtime_enable(dev); - if (ret) { - if (cqspi->rx_chan) - dma_release_channel(cqspi->rx_chan); - goto probe_setup_failed; - } + pm_runtime_enable(dev); pm_runtime_set_autosuspend_delay(dev, CQSPI_AUTOSUSPEND_TIMEOUT); pm_runtime_use_autosuspend(dev); @@ -1981,6 +1976,7 @@ static int cqspi_probe(struct platform_device *pdev) return 0; probe_setup_failed: cqspi_controller_enable(cqspi, 0); + pm_runtime_disable(dev); probe_reset_failed: if (cqspi->is_jh7110) cqspi_jh7110_disable_clk(pdev, cqspi); @@ -1999,7 +1995,8 @@ static void cqspi_remove(struct platform_device *pdev) if (cqspi->rx_chan) dma_release_channel(cqspi->rx_chan); - clk_disable_unprepare(cqspi->clk); + if (pm_runtime_get_sync(&pdev->dev) >= 0) + clk_disable(cqspi->clk); if (cqspi->is_jh7110) cqspi_jh7110_disable_clk(pdev, cqspi); diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 863781ba6c16..0dcd49114095 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -983,11 +983,20 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, if (dspi->devtype_data->trans_mode == DSPI_DMA_MODE) { status = dspi_dma_xfer(dspi); } else { + /* + * Reinitialize the completion before transferring data + * to avoid the case where it might remain in the done + * state due to a spurious interrupt from a previous + * transfer. This could falsely signal that the current + * transfer has completed. + */ + if (dspi->irq) + reinit_completion(&dspi->xfer_done); + dspi_fifo_write(dspi); if (dspi->irq) { wait_for_completion(&dspi->xfer_done); - reinit_completion(&dspi->xfer_done); } else { do { status = dspi_poll(dspi); diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index 77d9cc65477a..f2e1a27b410d 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -315,6 +315,22 @@ static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand) mtd_set_ooblayout(mtd, &qcom_spi_ooblayout); + /* + * Free the temporary BAM transaction allocated initially by + * qcom_nandc_alloc(), and allocate a new one based on the + * updated max_cwperpage value. + */ + qcom_free_bam_transaction(snandc); + + snandc->max_cwperpage = cwperpage; + + snandc->bam_txn = qcom_alloc_bam_transaction(snandc); + if (!snandc->bam_txn) { + dev_err(snandc->dev, "failed to allocate BAM transaction\n"); + ret = -ENOMEM; + goto err_free_ecc_cfg; + } + ecc_cfg->cfg0 = FIELD_PREP(CW_PER_PAGE_MASK, (cwperpage - 1)) | FIELD_PREP(UD_SIZE_BYTES_MASK, ecc_cfg->cw_data) | FIELD_PREP(DISABLE_STATUS_AFTER_WRITE, 1) | diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index 3581757a269b..3be7499db21e 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -407,9 +407,6 @@ tegra_qspi_read_rx_fifo_to_client_rxbuf(struct tegra_qspi *tqspi, struct spi_tra static void tegra_qspi_copy_client_txbuf_to_qspi_txbuf(struct tegra_qspi *tqspi, struct spi_transfer *t) { - dma_sync_single_for_cpu(tqspi->dev, tqspi->tx_dma_phys, - tqspi->dma_buf_size, DMA_TO_DEVICE); - /* * In packed mode, each word in FIFO may contain multiple packets * based on bits per word. So all bytes in each FIFO word are valid. @@ -442,17 +439,11 @@ tegra_qspi_copy_client_txbuf_to_qspi_txbuf(struct tegra_qspi *tqspi, struct spi_ tqspi->cur_tx_pos += write_bytes; } - - dma_sync_single_for_device(tqspi->dev, tqspi->tx_dma_phys, - tqspi->dma_buf_size, DMA_TO_DEVICE); } static void tegra_qspi_copy_qspi_rxbuf_to_client_rxbuf(struct tegra_qspi *tqspi, struct spi_transfer *t) { - dma_sync_single_for_cpu(tqspi->dev, tqspi->rx_dma_phys, - tqspi->dma_buf_size, DMA_FROM_DEVICE); - if (tqspi->is_packed) { tqspi->cur_rx_pos += tqspi->curr_dma_words * tqspi->bytes_per_word; } else { @@ -478,9 +469,6 @@ tegra_qspi_copy_qspi_rxbuf_to_client_rxbuf(struct tegra_qspi *tqspi, struct spi_ tqspi->cur_rx_pos += read_bytes; } - - dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys, - tqspi->dma_buf_size, DMA_FROM_DEVICE); } static void tegra_qspi_dma_complete(void *args) @@ -701,8 +689,6 @@ static int tegra_qspi_start_dma_based_transfer(struct tegra_qspi *tqspi, struct return ret; } - dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys, - tqspi->dma_buf_size, DMA_FROM_DEVICE); ret = tegra_qspi_start_rx_dma(tqspi, t, len); if (ret < 0) { dev_err(tqspi->dev, "failed to start RX DMA: %d\n", ret); diff --git a/drivers/staging/rtl8723bs/core/rtw_security.c b/drivers/staging/rtl8723bs/core/rtw_security.c index 1e9eff01b1aa..e9f382c280d9 100644 --- a/drivers/staging/rtl8723bs/core/rtw_security.c +++ b/drivers/staging/rtl8723bs/core/rtw_security.c @@ -868,29 +868,21 @@ static signed int aes_cipher(u8 *key, uint hdrlen, num_blocks, payload_index; u8 pn_vector[6]; - u8 mic_iv[16]; - u8 mic_header1[16]; - u8 mic_header2[16]; - u8 ctr_preload[16]; + u8 mic_iv[16] = {}; + u8 mic_header1[16] = {}; + u8 mic_header2[16] = {}; + u8 ctr_preload[16] = {}; /* Intermediate Buffers */ - u8 chain_buffer[16]; - u8 aes_out[16]; - u8 padded_buffer[16]; + u8 chain_buffer[16] = {}; + u8 aes_out[16] = {}; + u8 padded_buffer[16] = {}; u8 mic[8]; uint frtype = GetFrameType(pframe); uint frsubtype = GetFrameSubType(pframe); frsubtype = frsubtype>>4; - memset((void *)mic_iv, 0, 16); - memset((void *)mic_header1, 0, 16); - memset((void *)mic_header2, 0, 16); - memset((void *)ctr_preload, 0, 16); - memset((void *)chain_buffer, 0, 16); - memset((void *)aes_out, 0, 16); - memset((void *)padded_buffer, 0, 16); - if ((hdrlen == WLAN_HDR_A3_LEN) || (hdrlen == WLAN_HDR_A3_QOS_LEN)) a4_exists = 0; else @@ -1080,15 +1072,15 @@ static signed int aes_decipher(u8 *key, uint hdrlen, num_blocks, payload_index; signed int res = _SUCCESS; u8 pn_vector[6]; - u8 mic_iv[16]; - u8 mic_header1[16]; - u8 mic_header2[16]; - u8 ctr_preload[16]; + u8 mic_iv[16] = {}; + u8 mic_header1[16] = {}; + u8 mic_header2[16] = {}; + u8 ctr_preload[16] = {}; /* Intermediate Buffers */ - u8 chain_buffer[16]; - u8 aes_out[16]; - u8 padded_buffer[16]; + u8 chain_buffer[16] = {}; + u8 aes_out[16] = {}; + u8 padded_buffer[16] = {}; u8 mic[8]; uint frtype = GetFrameType(pframe); @@ -1096,14 +1088,6 @@ static signed int aes_decipher(u8 *key, uint hdrlen, frsubtype = frsubtype>>4; - memset((void *)mic_iv, 0, 16); - memset((void *)mic_header1, 0, 16); - memset((void *)mic_header2, 0, 16); - memset((void *)ctr_preload, 0, 16); - memset((void *)chain_buffer, 0, 16); - memset((void *)aes_out, 0, 16); - memset((void *)padded_buffer, 0, 16); - /* start to decrypt the payload */ num_blocks = (plen-8) / 16; /* plen including LLC, payload_length and mic) */ diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 34cf2c399b39..70905805cb17 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -1842,7 +1842,9 @@ out: } kmem_cache_free(t10_pr_reg_cache, dest_pr_reg); - core_scsi3_lunacl_undepend_item(dest_se_deve); + + if (dest_se_deve) + core_scsi3_lunacl_undepend_item(dest_se_deve); if (is_local) continue; diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c index f3af5666bb11..f9ef7d94cebd 100644 --- a/drivers/tee/optee/ffa_abi.c +++ b/drivers/tee/optee/ffa_abi.c @@ -728,12 +728,21 @@ static bool optee_ffa_exchange_caps(struct ffa_device *ffa_dev, return true; } +static void notif_work_fn(struct work_struct *work) +{ + struct optee_ffa *optee_ffa = container_of(work, struct optee_ffa, + notif_work); + struct optee *optee = container_of(optee_ffa, struct optee, ffa); + + optee_do_bottom_half(optee->ctx); +} + static void notif_callback(int notify_id, void *cb_data) { struct optee *optee = cb_data; if (notify_id == optee->ffa.bottom_half_value) - optee_do_bottom_half(optee->ctx); + queue_work(optee->ffa.notif_wq, &optee->ffa.notif_work); else optee_notif_send(optee, notify_id); } @@ -817,9 +826,11 @@ static void optee_ffa_remove(struct ffa_device *ffa_dev) struct optee *optee = ffa_dev_get_drvdata(ffa_dev); u32 bottom_half_id = optee->ffa.bottom_half_value; - if (bottom_half_id != U32_MAX) + if (bottom_half_id != U32_MAX) { ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev, bottom_half_id); + destroy_workqueue(optee->ffa.notif_wq); + } optee_remove_common(optee); mutex_destroy(&optee->ffa.mutex); @@ -835,6 +846,13 @@ static int optee_ffa_async_notif_init(struct ffa_device *ffa_dev, u32 notif_id = 0; int rc; + INIT_WORK(&optee->ffa.notif_work, notif_work_fn); + optee->ffa.notif_wq = create_workqueue("optee_notification"); + if (!optee->ffa.notif_wq) { + rc = -EINVAL; + goto err; + } + while (true) { rc = ffa_dev->ops->notifier_ops->notify_request(ffa_dev, is_per_vcpu, @@ -851,19 +869,24 @@ static int optee_ffa_async_notif_init(struct ffa_device *ffa_dev, * notifications in that case. */ if (rc != -EACCES) - return rc; + goto err_wq; notif_id++; if (notif_id >= OPTEE_FFA_MAX_ASYNC_NOTIF_VALUE) - return rc; + goto err_wq; } optee->ffa.bottom_half_value = notif_id; rc = enable_async_notif(optee); - if (rc < 0) { - ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev, - notif_id); - optee->ffa.bottom_half_value = U32_MAX; - } + if (rc < 0) + goto err_rel; + + return 0; +err_rel: + ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev, notif_id); +err_wq: + destroy_workqueue(optee->ffa.notif_wq); +err: + optee->ffa.bottom_half_value = U32_MAX; return rc; } diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h index dc0f355ef72a..9526087f0e68 100644 --- a/drivers/tee/optee/optee_private.h +++ b/drivers/tee/optee/optee_private.h @@ -165,6 +165,8 @@ struct optee_ffa { /* Serializes access to @global_ids */ struct mutex mutex; struct rhashtable global_ids; + struct workqueue_struct *notif_wq; + struct work_struct notif_work; }; struct optee; diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index bd02ee898f5d..500dfc009d03 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -235,6 +235,7 @@ struct imx_port { enum imx_tx_state tx_state; struct hrtimer trigger_start_tx; struct hrtimer trigger_stop_tx; + unsigned int rxtl; }; struct imx_port_ucrs { @@ -1339,6 +1340,7 @@ static void imx_uart_clear_rx_errors(struct imx_port *sport) #define TXTL_DEFAULT 8 #define RXTL_DEFAULT 8 /* 8 characters or aging timer */ +#define RXTL_CONSOLE_DEFAULT 1 #define TXTL_DMA 8 /* DMA burst setting */ #define RXTL_DMA 9 /* DMA burst setting */ @@ -1457,7 +1459,7 @@ static void imx_uart_disable_dma(struct imx_port *sport) ucr1 &= ~(UCR1_RXDMAEN | UCR1_TXDMAEN | UCR1_ATDMAEN); imx_uart_writel(sport, ucr1, UCR1); - imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); sport->dma_is_enabled = 0; } @@ -1482,7 +1484,12 @@ static int imx_uart_startup(struct uart_port *port) return retval; } - imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); + if (uart_console(&sport->port)) + sport->rxtl = RXTL_CONSOLE_DEFAULT; + else + sport->rxtl = RXTL_DEFAULT; + + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); /* disable the DREN bit (Data Ready interrupt enable) before * requesting IRQs @@ -1948,7 +1955,7 @@ static int imx_uart_poll_init(struct uart_port *port) if (retval) clk_disable_unprepare(sport->clk_ipg); - imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); uart_port_lock_irqsave(&sport->port, &flags); @@ -2040,7 +2047,7 @@ static int imx_uart_rs485_config(struct uart_port *port, struct ktermios *termio /* If the receiver trigger is 0, set it to a default value */ ufcr = imx_uart_readl(sport, UFCR); if ((ufcr & UFCR_RXTL_MASK) == 0) - imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); imx_uart_start_rx(port); } @@ -2302,7 +2309,7 @@ imx_uart_console_setup(struct console *co, char *options) else imx_uart_console_get_options(sport, &baud, &parity, &bits); - imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT); + imx_uart_setup_ufcr(sport, TXTL_DEFAULT, sport->rxtl); retval = uart_set_options(&sport->port, co, baud, parity, bits, flow); diff --git a/drivers/tty/serial/serial_base_bus.c b/drivers/tty/serial/serial_base_bus.c index 5d1677f1b651..cb3b127b06b6 100644 --- a/drivers/tty/serial/serial_base_bus.c +++ b/drivers/tty/serial/serial_base_bus.c @@ -72,6 +72,7 @@ static int serial_base_device_init(struct uart_port *port, dev->parent = parent_dev; dev->bus = &serial_base_bus_type; dev->release = release; + device_set_of_node_from_dev(dev, parent_dev); if (!serial_base_initialized) { dev_dbg(port->dev, "uart_add_one_port() called before arch_initcall()?\n"); diff --git a/drivers/tty/vt/ucs.c b/drivers/tty/vt/ucs.c index 6ead622b7713..03877485dfb7 100644 --- a/drivers/tty/vt/ucs.c +++ b/drivers/tty/vt/ucs.c @@ -206,7 +206,7 @@ static int ucs_page_entry_cmp(const void *key, const void *element) /** * ucs_get_fallback() - Get a substitution for the provided Unicode character - * @base: Base Unicode code point (UCS-4) + * @cp: Unicode code point (UCS-4) * * Get a simpler fallback character for the provided Unicode character. * This is used for terminal display when corresponding glyph is unavailable. diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index ed39d9cb4432..62049ceb34de 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -4650,6 +4650,7 @@ void do_unblank_screen(int leaving_gfx) set_palette(vc); set_cursor(vc); vt_event_post(VT_EVENT_UNBLANK, vc->vc_num, vc->vc_num); + notify_update(vc); } EXPORT_SYMBOL(do_unblank_screen); diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index de8b6acd4058..fcb4b14a710f 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -1808,7 +1808,7 @@ UFS_UNIT_DESC_PARAM(logical_block_size, _LOGICAL_BLK_SIZE, 1); UFS_UNIT_DESC_PARAM(logical_block_count, _LOGICAL_BLK_COUNT, 8); UFS_UNIT_DESC_PARAM(erase_block_size, _ERASE_BLK_SIZE, 4); UFS_UNIT_DESC_PARAM(provisioning_type, _PROVISIONING_TYPE, 1); -UFS_UNIT_DESC_PARAM(physical_memory_resourse_count, _PHY_MEM_RSRC_CNT, 8); +UFS_UNIT_DESC_PARAM(physical_memory_resource_count, _PHY_MEM_RSRC_CNT, 8); UFS_UNIT_DESC_PARAM(context_capabilities, _CTX_CAPABILITIES, 2); UFS_UNIT_DESC_PARAM(large_unit_granularity, _LARGE_UNIT_SIZE_M1, 1); UFS_UNIT_DESC_PARAM(wb_buf_alloc_units, _WB_BUF_ALLOC_UNITS, 4); @@ -1825,7 +1825,7 @@ static struct attribute *ufs_sysfs_unit_descriptor[] = { &dev_attr_logical_block_count.attr, &dev_attr_erase_block_size.attr, &dev_attr_provisioning_type.attr, - &dev_attr_physical_memory_resourse_count.attr, + &dev_attr_physical_memory_resource_count.attr, &dev_attr_context_capabilities.attr, &dev_attr_large_unit_granularity.attr, &dev_attr_wb_buf_alloc_units.attr, diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index f62d89c8e580..50adfb8b335b 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -7807,7 +7807,8 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) hba->silence_err_logs = false; /* scale up clocks to max frequency before full reinitialization */ - ufshcd_scale_clks(hba, ULONG_MAX, true); + if (ufshcd_is_clkscaling_supported(hba)) + ufshcd_scale_clks(hba, ULONG_MAX, true); err = ufshcd_hba_enable(hba); diff --git a/drivers/usb/cdns3/cdnsp-debug.h b/drivers/usb/cdns3/cdnsp-debug.h index cd138acdcce1..86860686d836 100644 --- a/drivers/usb/cdns3/cdnsp-debug.h +++ b/drivers/usb/cdns3/cdnsp-debug.h @@ -327,12 +327,13 @@ static inline const char *cdnsp_decode_trb(char *str, size_t size, u32 field0, case TRB_RESET_EP: case TRB_HALT_ENDPOINT: ret = scnprintf(str, size, - "%s: ep%d%s(%d) ctx %08x%08x slot %ld flags %c", + "%s: ep%d%s(%d) ctx %08x%08x slot %ld flags %c %c", cdnsp_trb_type_string(type), ep_num, ep_id % 2 ? "out" : "in", TRB_TO_EP_INDEX(field3), field1, field0, TRB_TO_SLOT_ID(field3), - field3 & TRB_CYCLE ? 'C' : 'c'); + field3 & TRB_CYCLE ? 'C' : 'c', + field3 & TRB_ESP ? 'P' : 'p'); break; case TRB_STOP_RING: ret = scnprintf(str, size, diff --git a/drivers/usb/cdns3/cdnsp-ep0.c b/drivers/usb/cdns3/cdnsp-ep0.c index f317d3c84781..5cd9b898ce97 100644 --- a/drivers/usb/cdns3/cdnsp-ep0.c +++ b/drivers/usb/cdns3/cdnsp-ep0.c @@ -414,6 +414,7 @@ static int cdnsp_ep0_std_request(struct cdnsp_device *pdev, void cdnsp_setup_analyze(struct cdnsp_device *pdev) { struct usb_ctrlrequest *ctrl = &pdev->setup; + struct cdnsp_ep *pep; int ret = -EINVAL; u16 len; @@ -427,10 +428,21 @@ void cdnsp_setup_analyze(struct cdnsp_device *pdev) goto out; } + pep = &pdev->eps[0]; + /* Restore the ep0 to Stopped/Running state. */ - if (pdev->eps[0].ep_state & EP_HALTED) { - trace_cdnsp_ep0_halted("Restore to normal state"); - cdnsp_halt_endpoint(pdev, &pdev->eps[0], 0); + if (pep->ep_state & EP_HALTED) { + if (GET_EP_CTX_STATE(pep->out_ctx) == EP_STATE_HALTED) + cdnsp_halt_endpoint(pdev, pep, 0); + + /* + * Halt Endpoint Command for SSP2 for ep0 preserve current + * endpoint state and driver has to synchronize the + * software endpoint state with endpoint output context + * state. + */ + pep->ep_state &= ~EP_HALTED; + pep->ep_state |= EP_STOPPED; } /* diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h index 2afa3e558f85..a91cca509db0 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.h +++ b/drivers/usb/cdns3/cdnsp-gadget.h @@ -987,6 +987,12 @@ enum cdnsp_setup_dev { #define STREAM_ID_FOR_TRB(p) ((((p)) << 16) & GENMASK(31, 16)) #define SCT_FOR_TRB(p) (((p) << 1) & 0x7) +/* + * Halt Endpoint Command TRB field. + * The ESP bit only exists in the SSP2 controller. + */ +#define TRB_ESP BIT(9) + /* Link TRB specific fields. */ #define TRB_TC BIT(1) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index fd06cb85c4ea..0758f171f73e 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -772,7 +772,9 @@ static int cdnsp_update_port_id(struct cdnsp_device *pdev, u32 port_id) } if (port_id != old_port) { - cdnsp_disable_slot(pdev); + if (pdev->slot_id) + cdnsp_disable_slot(pdev); + pdev->active_port = port; cdnsp_enable_slot(pdev); } @@ -2483,7 +2485,8 @@ void cdnsp_queue_halt_endpoint(struct cdnsp_device *pdev, unsigned int ep_index) { cdnsp_queue_command(pdev, 0, 0, 0, TRB_TYPE(TRB_HALT_ENDPOINT) | SLOT_ID_FOR_TRB(pdev->slot_id) | - EP_ID_FOR_TRB(ep_index)); + EP_ID_FOR_TRB(ep_index) | + (!ep_index ? TRB_ESP : 0)); } void cdnsp_force_header_wakeup(struct cdnsp_device *pdev, int intf_num) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 8a9b31fd5c89..1a48e6440e6c 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -2374,6 +2374,10 @@ static void udc_suspend(struct ci_hdrc *ci) */ if (hw_read(ci, OP_ENDPTLISTADDR, ~0) == 0) hw_write(ci, OP_ENDPTLISTADDR, ~0, ~0); + + if (ci->gadget.connected && + (!ci->suspended || !device_may_wakeup(ci->dev))) + usb_gadget_disconnect(&ci->gadget); } static void udc_resume(struct ci_hdrc *ci, bool power_lost) @@ -2384,6 +2388,9 @@ static void udc_resume(struct ci_hdrc *ci, bool power_lost) OTGSC_BSVIS | OTGSC_BSVIE); if (ci->vbus_active) usb_gadget_vbus_disconnect(&ci->gadget); + } else if (ci->vbus_active && ci->driver && + !ci->gadget.connected) { + usb_gadget_connect(&ci->gadget); } /* Restore value 0 if it was set for power lost check */ diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 770d1e91183c..3e1215f7a9a0 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -68,6 +68,12 @@ */ #define USB_SHORT_SET_ADDRESS_REQ_TIMEOUT 500 /* ms */ +/* + * Give SS hubs 200ms time after wake to train downstream links before + * assuming no port activity and allowing hub to runtime suspend back. + */ +#define USB_SS_PORT_U0_WAKE_TIME 200 /* ms */ + /* Protect struct usb_device->state and ->children members * Note: Both are also protected by ->dev.sem, except that ->state can * change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */ @@ -1095,6 +1101,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) goto init2; goto init3; } + hub_get(hub); /* The superspeed hub except for root hub has to use Hub Depth @@ -1343,6 +1350,17 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) device_unlock(&hdev->dev); } + if (type == HUB_RESUME && hub_is_superspeed(hub->hdev)) { + /* give usb3 downstream links training time after hub resume */ + usb_autopm_get_interface_no_resume( + to_usb_interface(hub->intfdev)); + + queue_delayed_work(system_power_efficient_wq, + &hub->post_resume_work, + msecs_to_jiffies(USB_SS_PORT_U0_WAKE_TIME)); + return; + } + hub_put(hub); } @@ -1361,6 +1379,14 @@ static void hub_init_func3(struct work_struct *ws) hub_activate(hub, HUB_INIT3); } +static void hub_post_resume(struct work_struct *ws) +{ + struct usb_hub *hub = container_of(ws, struct usb_hub, post_resume_work.work); + + usb_autopm_put_interface_async(to_usb_interface(hub->intfdev)); + hub_put(hub); +} + enum hub_quiescing_type { HUB_DISCONNECT, HUB_PRE_RESET, HUB_SUSPEND }; @@ -1386,6 +1412,7 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type) /* Stop hub_wq and related activity */ timer_delete_sync(&hub->irq_urb_retry); + flush_delayed_work(&hub->post_resume_work); usb_kill_urb(hub->urb); if (hub->has_indicators) cancel_delayed_work_sync(&hub->leds); @@ -1944,6 +1971,7 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) hub->hdev = hdev; INIT_DELAYED_WORK(&hub->leds, led_work); INIT_DELAYED_WORK(&hub->init_work, NULL); + INIT_DELAYED_WORK(&hub->post_resume_work, hub_post_resume); INIT_WORK(&hub->events, hub_event); INIT_LIST_HEAD(&hub->onboard_devs); spin_lock_init(&hub->irq_urb_lock); @@ -2337,6 +2365,9 @@ void usb_disconnect(struct usb_device **pdev) usb_remove_ep_devs(&udev->ep0); usb_unlock_device(udev); + if (udev->usb4_link) + device_link_del(udev->usb4_link); + /* Unregister the device. The device driver is responsible * for de-configuring the device and invoking the remove-device * notifier chain (used by usbfs and possibly others). diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index e6ae73f8a95d..9ebc5ef54a32 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -70,6 +70,7 @@ struct usb_hub { u8 indicator[USB_MAXCHILDREN]; struct delayed_work leds; struct delayed_work init_work; + struct delayed_work post_resume_work; struct work_struct events; spinlock_t irq_urb_lock; struct timer_list irq_urb_retry; diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 53d68d20fb62..0cf94c7a2c9c 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -227,7 +227,8 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME }, /* Logitech HD Webcam C270 */ - { USB_DEVICE(0x046d, 0x0825), .driver_info = USB_QUIRK_RESET_RESUME }, + { USB_DEVICE(0x046d, 0x0825), .driver_info = USB_QUIRK_RESET_RESUME | + USB_QUIRK_NO_LPM}, /* Logitech HD Pro Webcams C920, C920-C, C922, C925e and C930e */ { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c index ea1ce8beb0cb..489dbdc96f94 100644 --- a/drivers/usb/core/usb-acpi.c +++ b/drivers/usb/core/usb-acpi.c @@ -157,7 +157,7 @@ EXPORT_SYMBOL_GPL(usb_acpi_set_power_state); */ static int usb_acpi_add_usb4_devlink(struct usb_device *udev) { - const struct device_link *link; + struct device_link *link; struct usb_port *port_dev; struct usb_hub *hub; @@ -188,6 +188,8 @@ static int usb_acpi_add_usb4_devlink(struct usb_device *udev) dev_dbg(&port_dev->dev, "Created device link from %s to %s\n", dev_name(&port_dev->child->dev), dev_name(nhi_fwnode->dev)); + udev->usb4_link = link; + return 0; } diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 2bc775a747f2..8002c23a5a02 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -2422,6 +2422,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) { u32 reg; int i; + int ret; if (!pm_runtime_suspended(dwc->dev) && !PMSG_IS_AUTO(msg)) { dwc->susphy_state = (dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)) & @@ -2440,7 +2441,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) case DWC3_GCTL_PRTCAP_DEVICE: if (pm_runtime_suspended(dwc->dev)) break; - dwc3_gadget_suspend(dwc); + ret = dwc3_gadget_suspend(dwc); + if (ret) + return ret; synchronize_irq(dwc->irq_gadget); dwc3_core_exit(dwc); break; @@ -2475,7 +2478,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg) break; if (dwc->current_otg_role == DWC3_OTG_ROLE_DEVICE) { - dwc3_gadget_suspend(dwc); + ret = dwc3_gadget_suspend(dwc); + if (ret) + return ret; synchronize_irq(dwc->irq_gadget); } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 321361288935..74968f93d4a3 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3516,7 +3516,7 @@ static int dwc3_gadget_ep_reclaim_completed_trb(struct dwc3_ep *dep, * We're going to do that here to avoid problems of HW trying * to use bogus TRBs for transfers. */ - if (chain && (trb->ctrl & DWC3_TRB_CTRL_HWO)) + if (trb->ctrl & DWC3_TRB_CTRL_HWO) trb->ctrl &= ~DWC3_TRB_CTRL_HWO; /* @@ -4821,26 +4821,22 @@ int dwc3_gadget_suspend(struct dwc3 *dwc) int ret; ret = dwc3_gadget_soft_disconnect(dwc); - if (ret) - goto err; - - spin_lock_irqsave(&dwc->lock, flags); - if (dwc->gadget_driver) - dwc3_disconnect_gadget(dwc); - spin_unlock_irqrestore(&dwc->lock, flags); - - return 0; - -err: /* * Attempt to reset the controller's state. Likely no * communication can be established until the host * performs a port reset. */ - if (dwc->softconnect) + if (ret && dwc->softconnect) { dwc3_gadget_soft_connect(dwc); + return -EAGAIN; + } - return ret; + spin_lock_irqsave(&dwc->lock, flags); + if (dwc->gadget_driver) + dwc3_disconnect_gadget(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); + + return 0; } int dwc3_gadget_resume(struct dwc3 *dwc) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index ab544f6824be..540dc5ab96fc 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -295,8 +295,8 @@ __acquires(&port->port_lock) break; } - if (do_tty_wake && port->port.tty) - tty_wakeup(port->port.tty); + if (do_tty_wake) + tty_port_tty_wakeup(&port->port); return status; } @@ -544,20 +544,16 @@ static int gs_alloc_requests(struct usb_ep *ep, struct list_head *head, static int gs_start_io(struct gs_port *port) { struct list_head *head = &port->read_pool; - struct usb_ep *ep; + struct usb_ep *ep = port->port_usb->out; int status; unsigned started; - if (!port->port_usb || !port->port.tty) - return -EIO; - /* Allocate RX and TX I/O buffers. We can't easily do this much * earlier (with GFP_KERNEL) because the requests are coupled to * endpoints, as are the packet sizes we'll be using. Different * configurations may use different endpoints with a given port; * and high speed vs full speed changes packet sizes too. */ - ep = port->port_usb->out; status = gs_alloc_requests(ep, head, gs_read_complete, &port->read_allocated); if (status) @@ -578,7 +574,7 @@ static int gs_start_io(struct gs_port *port) gs_start_tx(port); /* Unblock any pending writes into our circular buffer, in case * we didn't in gs_start_tx() */ - tty_wakeup(port->port.tty); + tty_port_tty_wakeup(&port->port); } else { /* Free reqs only if we are still connected */ if (port->port_usb) { diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index 0d4ce5734165..06a2edb9e86e 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -652,6 +652,10 @@ static void xhci_dbc_stop(struct xhci_dbc *dbc) case DS_DISABLED: return; case DS_CONFIGURED: + spin_lock(&dbc->lock); + xhci_dbc_flush_requests(dbc); + spin_unlock(&dbc->lock); + if (dbc->driver->disconnect) dbc->driver->disconnect(dbc); break; diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c index 60ed753c85bb..d894081d8d15 100644 --- a/drivers/usb/host/xhci-dbgtty.c +++ b/drivers/usb/host/xhci-dbgtty.c @@ -617,6 +617,7 @@ int dbc_tty_init(void) dbc_tty_driver->type = TTY_DRIVER_TYPE_SERIAL; dbc_tty_driver->subtype = SERIAL_TYPE_NORMAL; dbc_tty_driver->init_termios = tty_std_termios; + dbc_tty_driver->init_termios.c_lflag &= ~ECHO; dbc_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; dbc_tty_driver->init_termios.c_ispeed = 9600; diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index bd745a0f2f78..6680afa4f596 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1449,6 +1449,10 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, /* Periodic endpoint bInterval limit quirk */ if (usb_endpoint_xfer_int(&ep->desc) || usb_endpoint_xfer_isoc(&ep->desc)) { + if ((xhci->quirks & XHCI_LIMIT_ENDPOINT_INTERVAL_9) && + interval >= 9) { + interval = 8; + } if ((xhci->quirks & XHCI_LIMIT_ENDPOINT_INTERVAL_7) && udev->speed >= USB_SPEED_HIGH && interval >= 7) { diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 0c481cbc8f08..00fac8b233d2 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -71,12 +71,22 @@ #define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_4C_XHCI 0x15ec #define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI 0x15f0 +#define PCI_DEVICE_ID_AMD_ARIEL_TYPEC_XHCI 0x13ed +#define PCI_DEVICE_ID_AMD_ARIEL_TYPEA_XHCI 0x13ee +#define PCI_DEVICE_ID_AMD_STARSHIP_XHCI 0x148c +#define PCI_DEVICE_ID_AMD_FIREFLIGHT_15D4_XHCI 0x15d4 +#define PCI_DEVICE_ID_AMD_FIREFLIGHT_15D5_XHCI 0x15d5 +#define PCI_DEVICE_ID_AMD_RAVEN_15E0_XHCI 0x15e0 +#define PCI_DEVICE_ID_AMD_RAVEN_15E1_XHCI 0x15e1 +#define PCI_DEVICE_ID_AMD_RAVEN2_XHCI 0x15e5 #define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba #define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb #define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc +#define PCI_DEVICE_ID_ATI_NAVI10_7316_XHCI 0x7316 + #define PCI_DEVICE_ID_ASMEDIA_1042_XHCI 0x1042 #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142 #define PCI_DEVICE_ID_ASMEDIA_1142_XHCI 0x1242 @@ -280,6 +290,21 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_NEC) xhci->quirks |= XHCI_NEC_HOST; + if (pdev->vendor == PCI_VENDOR_ID_AMD && + (pdev->device == PCI_DEVICE_ID_AMD_ARIEL_TYPEC_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_ARIEL_TYPEA_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_STARSHIP_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_FIREFLIGHT_15D4_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_FIREFLIGHT_15D5_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_RAVEN_15E0_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_RAVEN_15E1_XHCI || + pdev->device == PCI_DEVICE_ID_AMD_RAVEN2_XHCI)) + xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_9; + + if (pdev->vendor == PCI_VENDOR_ID_ATI && + pdev->device == PCI_DEVICE_ID_ATI_NAVI10_7316_XHCI) + xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_9; + if (pdev->vendor == PCI_VENDOR_ID_AMD && xhci->hci_version == 0x96) xhci->quirks |= XHCI_AMD_0x96_HOST; diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 6dab142e7278..c79d5ed48a08 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -328,7 +328,8 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s } usb3_hcd = xhci_get_usb3_hcd(xhci); - if (usb3_hcd && HCC_MAX_PSA(xhci->hcc_params) >= 4) + if (usb3_hcd && HCC_MAX_PSA(xhci->hcc_params) >= 4 && + !(xhci->quirks & XHCI_BROKEN_STREAMS)) usb3_hcd->can_do_streams = 1; if (xhci->shared_hcd) { diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index e038ad3375dc..94c9c9271658 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -518,9 +518,8 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags) * In the future we should distinguish between -ENODEV and -ETIMEDOUT * and try to recover a -ETIMEDOUT with a host controller reset. */ - ret = xhci_handshake_check_state(xhci, &xhci->op_regs->cmd_ring, - CMD_RING_RUNNING, 0, 5 * 1000 * 1000, - XHCI_STATE_REMOVING); + ret = xhci_handshake(&xhci->op_regs->cmd_ring, + CMD_RING_RUNNING, 0, 5 * 1000 * 1000); if (ret < 0) { xhci_err(xhci, "Abort failed to stop command ring: %d\n", ret); xhci_halt(xhci); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 4e6dbd2375c3..8a819e853288 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -85,29 +85,6 @@ int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us) } /* - * xhci_handshake_check_state - same as xhci_handshake but takes an additional - * exit_state parameter, and bails out with an error immediately when xhc_state - * has exit_state flag set. - */ -int xhci_handshake_check_state(struct xhci_hcd *xhci, void __iomem *ptr, - u32 mask, u32 done, int usec, unsigned int exit_state) -{ - u32 result; - int ret; - - ret = readl_poll_timeout_atomic(ptr, result, - (result & mask) == done || - result == U32_MAX || - xhci->xhc_state & exit_state, - 1, usec); - - if (result == U32_MAX || xhci->xhc_state & exit_state) - return -ENODEV; - - return ret; -} - -/* * Disable interrupts and begin the xHCI halting process. */ void xhci_quiesce(struct xhci_hcd *xhci) @@ -227,8 +204,7 @@ int xhci_reset(struct xhci_hcd *xhci, u64 timeout_us) if (xhci->quirks & XHCI_INTEL_HOST) udelay(1000); - ret = xhci_handshake_check_state(xhci, &xhci->op_regs->command, - CMD_RESET, 0, timeout_us, XHCI_STATE_REMOVING); + ret = xhci_handshake(&xhci->op_regs->command, CMD_RESET, 0, timeout_us); if (ret) return ret; @@ -1182,7 +1158,10 @@ int xhci_resume(struct xhci_hcd *xhci, bool power_lost, bool is_auto_resume) xhci_dbg(xhci, "Stop HCD\n"); xhci_halt(xhci); xhci_zero_64b_regs(xhci); - retval = xhci_reset(xhci, XHCI_RESET_LONG_USEC); + if (xhci->xhc_state & XHCI_STATE_REMOVING) + retval = -ENODEV; + else + retval = xhci_reset(xhci, XHCI_RESET_LONG_USEC); spin_unlock_irq(&xhci->lock); if (retval) return retval; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 49887a303e43..a20f4e7cd43a 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1643,6 +1643,7 @@ struct xhci_hcd { #define XHCI_WRITE_64_HI_LO BIT_ULL(47) #define XHCI_CDNS_SCTX_QUIRK BIT_ULL(48) #define XHCI_ETRON_HOST BIT_ULL(49) +#define XHCI_LIMIT_ENDPOINT_INTERVAL_9 BIT_ULL(50) unsigned int num_active_eps; unsigned int limit_active_eps; @@ -1868,8 +1869,6 @@ void xhci_skip_sec_intr_events(struct xhci_hcd *xhci, /* xHCI host controller glue */ typedef void (*xhci_get_quirks_t)(struct device *, struct xhci_hcd *); int xhci_handshake(void __iomem *ptr, u32 mask, u32 done, u64 timeout_us); -int xhci_handshake_check_state(struct xhci_hcd *xhci, void __iomem *ptr, - u32 mask, u32 done, int usec, unsigned int exit_state); void xhci_quiesce(struct xhci_hcd *xhci); int xhci_halt(struct xhci_hcd *xhci); int xhci_start(struct xhci_hcd *xhci); diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index b09b58d7311d..d8b906ec4d1c 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -394,8 +394,7 @@ static int dp_altmode_vdm(struct typec_altmode *alt, case CMDT_RSP_NAK: switch (cmd) { case DP_CMD_STATUS_UPDATE: - if (typec_altmode_exit(alt)) - dev_err(&dp->alt->dev, "Exit Mode Failed!\n"); + dp->state = DP_STATE_EXIT; break; case DP_CMD_CONFIGURE: dp->data.conf = 0; @@ -677,7 +676,7 @@ static ssize_t pin_assignment_show(struct device *dev, assignments = get_current_pin_assignments(dp); - for (i = 0; assignments; assignments >>= 1, i++) { + for (i = 0; assignments && i < DP_PIN_ASSIGN_MAX; assignments >>= 1, i++) { if (assignments & 1) { if (i == cur) len += sprintf(buf + len, "[%s] ", diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 1a1f9e1f8e4e..1f6fdfaa34bf 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4410,17 +4410,6 @@ static int tcpm_src_attach(struct tcpm_port *port) tcpm_enable_auto_vbus_discharge(port, true); - ret = tcpm_set_roles(port, true, TYPEC_STATE_USB, - TYPEC_SOURCE, tcpm_data_role_for_source(port)); - if (ret < 0) - return ret; - - if (port->pd_supported) { - ret = port->tcpc->set_pd_rx(port->tcpc, true); - if (ret < 0) - goto out_disable_mux; - } - /* * USB Type-C specification, version 1.2, * chapter 4.5.2.2.8.1 (Attached.SRC Requirements) @@ -4430,13 +4419,24 @@ static int tcpm_src_attach(struct tcpm_port *port) (polarity == TYPEC_POLARITY_CC2 && port->cc1 == TYPEC_CC_RA)) { ret = tcpm_set_vconn(port, true); if (ret < 0) - goto out_disable_pd; + return ret; } ret = tcpm_set_vbus(port, true); if (ret < 0) goto out_disable_vconn; + ret = tcpm_set_roles(port, true, TYPEC_STATE_USB, TYPEC_SOURCE, + tcpm_data_role_for_source(port)); + if (ret < 0) + goto out_disable_vbus; + + if (port->pd_supported) { + ret = port->tcpc->set_pd_rx(port->tcpc, true); + if (ret < 0) + goto out_disable_mux; + } + port->pd_capable = false; port->partner = NULL; @@ -4447,14 +4447,14 @@ static int tcpm_src_attach(struct tcpm_port *port) return 0; -out_disable_vconn: - tcpm_set_vconn(port, false); -out_disable_pd: - if (port->pd_supported) - port->tcpc->set_pd_rx(port->tcpc, false); out_disable_mux: tcpm_mux_set(port, TYPEC_STATE_SAFE, USB_ROLE_NONE, TYPEC_ORIENTATION_NONE); +out_disable_vbus: + tcpm_set_vbus(port, false); +out_disable_vconn: + tcpm_set_vconn(port, false); + return ret; } diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index b784aab66867..4397392bfef0 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2797,7 +2797,7 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, void (*recycle_done)(struct virtqueue *vq)) { struct vring_virtqueue *vq = to_vvq(_vq); - int err; + int err, err_reset; if (num > vq->vq.num_max) return -E2BIG; @@ -2819,7 +2819,11 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num, else err = virtqueue_resize_split(_vq, num); - return virtqueue_enable_after_reset(_vq); + err_reset = virtqueue_enable_after_reset(_vq); + if (err_reset) + return err_reset; + + return err; } EXPORT_SYMBOL_GPL(virtqueue_resize); diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index e51e7d88980a..1d847a939f29 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -98,14 +98,25 @@ static struct file_system_type anon_inode_fs_type = { .kill_sb = kill_anon_super, }; -static struct inode *anon_inode_make_secure_inode( - const char *name, - const struct inode *context_inode) +/** + * anon_inode_make_secure_inode - allocate an anonymous inode with security context + * @sb: [in] Superblock to allocate from + * @name: [in] Name of the class of the newfile (e.g., "secretmem") + * @context_inode: + * [in] Optional parent inode for security inheritance + * + * The function ensures proper security initialization through the LSM hook + * security_inode_init_security_anon(). + * + * Return: Pointer to new inode on success, ERR_PTR on failure. + */ +struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, + const struct inode *context_inode) { struct inode *inode; int error; - inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); + inode = alloc_anon_inode(sb); if (IS_ERR(inode)) return inode; inode->i_flags &= ~S_PRIVATE; @@ -118,6 +129,7 @@ static struct inode *anon_inode_make_secure_inode( } return inode; } +EXPORT_SYMBOL_GPL_FOR_MODULES(anon_inode_make_secure_inode, "kvm"); static struct file *__anon_inode_getfile(const char *name, const struct file_operations *fops, @@ -132,7 +144,8 @@ static struct file *__anon_inode_getfile(const char *name, return ERR_PTR(-ENOENT); if (make_inode) { - inode = anon_inode_make_secure_inode(name, context_inode); + inode = anon_inode_make_secure_inode(anon_inode_mnt->mnt_sb, + name, context_inode); if (IS_ERR(inode)) { file = ERR_CAST(inode); goto err; diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index d03adc36100e..307824d6eccb 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -279,7 +279,7 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu) if (rcu) return ERR_PTR(-ECHILD); - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); retry: bch2_trans_begin(trans); @@ -304,7 +304,6 @@ err: set_cached_acl(&inode->v, type, acl); bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return acl; } @@ -350,8 +349,8 @@ int bch2_set_acl(struct mnt_idmap *idmap, umode_t mode; int ret; - mutex_lock(&inode->ei_update_lock); - struct btree_trans *trans = bch2_trans_get(c); + guard(mutex)(&inode->ei_update_lock); + CLASS(btree_trans, trans)(c); retry: bch2_trans_begin(trans); acl = _acl; @@ -385,17 +384,13 @@ btree_err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (unlikely(ret)) - goto err; + return ret; bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME|ATTR_MODE); set_cached_acl(&inode->v, type, acl); -err: - bch2_trans_put(trans); - mutex_unlock(&inode->ei_update_lock); - - return ret; + return 0; } int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum, diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index d64839c756bc..4c1604fd80f9 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -565,11 +565,11 @@ void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bke int bch2_bucket_gens_init(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); struct bkey_i_bucket_gens g; bool have_bucket_gens_key = false; int ret; + CLASS(btree_trans, trans)(c); ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_prefetch, k, ({ /* @@ -609,17 +609,14 @@ iter_err: BCH_TRANS_COMMIT_no_enospc, bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0)); - bch2_trans_put(trans); - - bch_err_fn(c, ret); return ret; } int bch2_alloc_read(struct bch_fs *c) { - down_read(&c->state_lock); + guard(rwsem_read)(&c->state_lock); - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bch_dev *ca = NULL; int ret; @@ -680,10 +677,6 @@ int bch2_alloc_read(struct bch_fs *c) } bch2_dev_put(ca); - bch2_trans_put(trans); - - up_read(&c->state_lock); - bch_err_fn(c, ret); return ret; } @@ -699,7 +692,7 @@ static int __need_discard_or_freespace_err(struct btree_trans *trans, ? BCH_FSCK_ERR_need_discard_key_wrong : BCH_FSCK_ERR_freespace_key_wrong; enum btree_id btree = discard ? BTREE_ID_need_discard : BTREE_ID_freespace; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, alloc_k); @@ -711,8 +704,6 @@ static int __need_discard_or_freespace_err(struct btree_trans *trans, if (bch2_err_matches(ret, BCH_ERR_fsck_ignore) || bch2_err_matches(ret, BCH_ERR_fsck_errors_not_fixed)) ret = 0; - - printbuf_exit(&buf); return ret; } @@ -860,10 +851,10 @@ int bch2_trigger_alloc(struct btree_trans *trans, enum btree_iter_update_trigger_flags flags) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; - struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p); + CLASS(bch2_dev_bucket_tryget, ca)(c, new.k->p); if (!ca) return bch_err_throw(c, trigger_alloc); @@ -879,7 +870,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c); ret = PTR_ERR_OR_ZERO(new_ka); if (unlikely(ret)) - goto err; + return ret; new_a = &new_ka->v; } @@ -913,7 +904,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, ret = bch2_bucket_do_index(trans, ca, old, old_a, false) ?: bch2_bucket_do_index(trans, ca, new.s_c, new_a, true); if (ret) - goto err; + return ret; } if (new_a->data_type == BCH_DATA_cached && @@ -925,7 +916,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, alloc_lru_idx_read(*old_a), alloc_lru_idx_read(*new_a)); if (ret) - goto err; + return ret; ret = bch2_lru_change(trans, BCH_LRU_BUCKET_FRAGMENTATION, @@ -933,17 +924,17 @@ int bch2_trigger_alloc(struct btree_trans *trans, alloc_lru_idx_fragmentation(*old_a, ca), alloc_lru_idx_fragmentation(*new_a, ca)); if (ret) - goto err; + return ret; if (old_a->gen != new_a->gen) { ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen); if (ret) - goto err; + return ret; } ret = bch2_alloc_key_to_dev_counters(trans, ca, old_a, new_a, flags); if (ret) - goto err; + return ret; } if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) { @@ -994,7 +985,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, if (bch2_fs_fatal_err_on(ret, c, "setting bucket_needs_journal_commit: %s", bch2_err_str(ret))) - goto err; + return ret; } } @@ -1036,16 +1027,12 @@ int bch2_trigger_alloc(struct btree_trans *trans, g->gen_valid = 1; g->gen = new_a->gen; } -err: fsck_err: - printbuf_exit(&buf); - bch2_dev_put(ca); return ret; invalid_bucket: bch2_fs_inconsistent(c, "reference to invalid bucket\n%s", (bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf)); - ret = bch_err_throw(c, trigger_alloc); - goto err; + return bch_err_throw(c, trigger_alloc); } /* @@ -1164,10 +1151,10 @@ int bch2_check_alloc_key(struct btree_trans *trans, const struct bch_alloc_v4 *a; unsigned gens_offset; struct bkey_s_c k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; - struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, alloc_k.k->p); + CLASS(bch2_dev_bucket_tryget_noerror, ca)(c, alloc_k.k->p); if (fsck_err_on(!ca, trans, alloc_key_to_missing_dev_bucket, "alloc key for invalid device:bucket %llu:%llu", @@ -1177,7 +1164,7 @@ int bch2_check_alloc_key(struct btree_trans *trans, return ret; if (!ca->mi.freespace_initialized) - goto out; + return 0; a = bch2_alloc_to_v4(alloc_k, &a_convert); @@ -1185,35 +1172,35 @@ int bch2_check_alloc_key(struct btree_trans *trans, k = bch2_btree_iter_peek_slot(trans, discard_iter); ret = bkey_err(k); if (ret) - goto err; + return ret; bool is_discarded = a->data_type == BCH_DATA_need_discard; if (need_discard_or_freespace_err_on(!!k.k->type != is_discarded, trans, alloc_k, !is_discarded, true, true)) { ret = bch2_btree_bit_mod_iter(trans, discard_iter, is_discarded); if (ret) - goto err; + return ret; } bch2_btree_iter_set_pos(trans, freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a)); k = bch2_btree_iter_peek_slot(trans, freespace_iter); ret = bkey_err(k); if (ret) - goto err; + return ret; bool is_free = a->data_type == BCH_DATA_free; if (need_discard_or_freespace_err_on(!!k.k->type != is_free, trans, alloc_k, !is_free, false, true)) { ret = bch2_btree_bit_mod_iter(trans, freespace_iter, is_free); if (ret) - goto err; + return ret; } bch2_btree_iter_set_pos(trans, bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset)); k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter); ret = bkey_err(k); if (ret) - goto err; + return ret; if (fsck_err_on(a->gen != alloc_gen(k, gens_offset), trans, bucket_gens_key_wrong, @@ -1226,7 +1213,7 @@ int bch2_check_alloc_key(struct btree_trans *trans, ret = PTR_ERR_OR_ZERO(g); if (ret) - goto err; + return ret; if (k.k->type == KEY_TYPE_bucket_gens) { bkey_reassemble(&g->k_i, k); @@ -1239,13 +1226,9 @@ int bch2_check_alloc_key(struct btree_trans *trans, ret = bch2_trans_update(trans, bucket_gens_iter, &g->k_i, 0); if (ret) - goto err; + return ret; } -out: -err: fsck_err: - bch2_dev_put(ca); - printbuf_exit(&buf); return ret; } @@ -1257,7 +1240,7 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans, struct btree_iter *freespace_iter) { struct bkey_s_c k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret; if (!ca->mi.freespace_initialized) @@ -1268,7 +1251,7 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans, k = bch2_btree_iter_peek_slot(trans, freespace_iter); ret = bkey_err(k); if (ret) - goto err; + return ret; *end = bkey_min(k.k->p, *end); @@ -1281,10 +1264,9 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans, end->offset)) { struct bkey_i *update = bch2_trans_kmalloc(trans, sizeof(*update)); - ret = PTR_ERR_OR_ZERO(update); if (ret) - goto err; + return ret; bkey_init(&update->k); update->k.type = KEY_TYPE_set; @@ -1295,11 +1277,9 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans, ret = bch2_trans_update(trans, freespace_iter, update, 0); if (ret) - goto err; + return ret; } -err: fsck_err: - printbuf_exit(&buf); return ret; } @@ -1310,7 +1290,7 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, struct btree_iter *bucket_gens_iter) { struct bkey_s_c k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); unsigned i, gens_offset, gens_end_offset; int ret; @@ -1319,7 +1299,7 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter); ret = bkey_err(k); if (ret) - goto err; + return ret; if (bkey_cmp(alloc_gens_pos(start, &gens_offset), alloc_gens_pos(*end, &gens_end_offset))) @@ -1345,23 +1325,20 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, if (need_update) { struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g)); - ret = PTR_ERR_OR_ZERO(u); if (ret) - goto err; + return ret; memcpy(u, &g, sizeof(g)); ret = bch2_trans_update(trans, bucket_gens_iter, u, 0); if (ret) - goto err; + return ret; } } *end = bkey_min(*end, bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0)); -err: fsck_err: - printbuf_exit(&buf); return ret; } @@ -1404,7 +1381,7 @@ int __bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_i enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard ? BCH_DATA_need_discard : BCH_DATA_free; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool async_repair = fsck_flags & FSCK_ERR_NO_LOG; fsck_flags |= FSCK_CAN_FIX|FSCK_CAN_IGNORE; @@ -1456,7 +1433,6 @@ out: fsck_err: bch2_set_btree_iter_dontneed(trans, &alloc_iter); bch2_trans_iter_exit(trans, &alloc_iter); - printbuf_exit(&buf); return ret; delete: if (!async_repair) { @@ -1513,19 +1489,19 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; u64 b; bool need_update = false; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; BUG_ON(k.k->type != KEY_TYPE_bucket_gens); bkey_reassemble(&g.k_i, k); - struct bch_dev *ca = bch2_dev_tryget_noerror(c, k.k->p.inode); + CLASS(bch2_dev_tryget_noerror, ca)(c, k.k->p.inode); if (!ca) { if (fsck_err(trans, bucket_gens_to_invalid_dev, "bucket_gens key for invalid device:\n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - ret = bch2_btree_delete_at(trans, iter, 0); - goto out; + return bch2_btree_delete_at(trans, iter, 0); + return 0; } if (fsck_err_on(end <= ca->mi.first_bucket || @@ -1533,8 +1509,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, trans, bucket_gens_to_invalid_buckets, "bucket_gens key for invalid buckets:\n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = bch2_btree_delete_at(trans, iter, 0); - goto out; + return bch2_btree_delete_at(trans, iter, 0); } for (b = start; b < ca->mi.first_bucket; b++) @@ -1555,30 +1530,26 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, if (need_update) { struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g)); - ret = PTR_ERR_OR_ZERO(u); if (ret) - goto out; + return ret; memcpy(u, &g, sizeof(g)); - ret = bch2_trans_update(trans, iter, u, 0); + return bch2_trans_update(trans, iter, u, 0); } -out: fsck_err: - bch2_dev_put(ca); - printbuf_exit(&buf); return ret; } int bch2_check_alloc_info(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter, discard_iter, freespace_iter, bucket_gens_iter; struct bch_dev *ca = NULL; struct bkey hole; struct bkey_s_c k; int ret = 0; + CLASS(btree_trans, trans)(c); bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_prefetch); bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, POS_MIN, @@ -1646,14 +1617,14 @@ bkey_err: ca = NULL; if (ret < 0) - goto err; + return ret; ret = for_each_btree_key(trans, iter, BTREE_ID_need_discard, POS_MIN, BTREE_ITER_prefetch, k, bch2_check_discard_freespace_key(trans, &iter)); if (ret) - goto err; + return ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_freespace, POS_MIN, BTREE_ITER_prefetch); @@ -1670,11 +1641,9 @@ bkey_err: continue; } if (ret) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, k); - bch_err(c, "while checking %s", buf.buf); - printbuf_exit(&buf); break; } @@ -1682,16 +1651,14 @@ bkey_err: } bch2_trans_iter_exit(trans, &iter); if (ret) - goto err; + return ret; ret = for_each_btree_key_commit(trans, iter, BTREE_ID_bucket_gens, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_check_bucket_gens_key(trans, &iter, k)); -err: - bch2_trans_put(trans); - bch_err_fn(c, ret); + return ret; } @@ -1703,7 +1670,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, struct bch_alloc_v4 a_convert; const struct bch_alloc_v4 *a; struct bkey_s_c alloc_k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret; alloc_k = bch2_btree_iter_peek(trans, alloc_iter); @@ -1714,7 +1681,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, if (ret) return ret; - struct bch_dev *ca = bch2_dev_tryget_noerror(c, alloc_k.k->p.inode); + CLASS(bch2_dev_tryget_noerror, ca)(c, alloc_k.k->p.inode); if (!ca) return 0; @@ -1726,95 +1693,80 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, bucket_to_u64(alloc_k.k->p), lru_idx, alloc_k, last_flushed); if (ret) - goto err; + return ret; } - if (a->data_type != BCH_DATA_cached) - goto err; + if (a->data_type == BCH_DATA_cached) { + if (fsck_err_on(!a->io_time[READ], + trans, alloc_key_cached_but_read_time_zero, + "cached bucket with read_time 0\n%s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { + struct bkey_i_alloc_v4 *a_mut = + bch2_alloc_to_v4_mut(trans, alloc_k); + ret = PTR_ERR_OR_ZERO(a_mut); + if (ret) + return ret; - if (fsck_err_on(!a->io_time[READ], - trans, alloc_key_cached_but_read_time_zero, - "cached bucket with read_time 0\n%s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { - struct bkey_i_alloc_v4 *a_mut = - bch2_alloc_to_v4_mut(trans, alloc_k); - ret = PTR_ERR_OR_ZERO(a_mut); - if (ret) - goto err; + a_mut->v.io_time[READ] = bch2_current_io_time(c, READ); + ret = bch2_trans_update(trans, alloc_iter, + &a_mut->k_i, BTREE_TRIGGER_norun); + if (ret) + return ret; - a_mut->v.io_time[READ] = bch2_current_io_time(c, READ); - ret = bch2_trans_update(trans, alloc_iter, - &a_mut->k_i, BTREE_TRIGGER_norun); - if (ret) - goto err; + a = &a_mut->v; + } - a = &a_mut->v; + ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, + bucket_to_u64(alloc_k.k->p), + a->io_time[READ], + alloc_k, last_flushed); } - - ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, - bucket_to_u64(alloc_k.k->p), - a->io_time[READ], - alloc_k, last_flushed); - if (ret) - goto err; -err: fsck_err: - bch2_dev_put(ca); - printbuf_exit(&buf); return ret; } int bch2_check_alloc_to_lru_refs(struct bch_fs *c) { struct bkey_buf last_flushed; - bch2_bkey_buf_init(&last_flushed); bkey_init(&last_flushed.k->k); - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))) ?: - bch2_check_stripe_to_lru_refs(c); + bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed)) ?: + bch2_check_stripe_to_lru_refs(trans); bch2_bkey_buf_exit(&last_flushed, c); - bch_err_fn(c, ret); return ret; } static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress) { struct bch_fs *c = ca->fs; - int ret; - mutex_lock(&ca->discard_buckets_in_flight_lock); + guard(mutex)(&ca->discard_buckets_in_flight_lock); struct discard_in_flight *i = darray_find_p(ca->discard_buckets_in_flight, i, i->bucket == bucket); - if (i) { - ret = bch_err_throw(c, EEXIST_discard_in_flight_add); - goto out; - } + if (i) + return bch_err_throw(c, EEXIST_discard_in_flight_add); - ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) { + return darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) { .in_progress = in_progress, .bucket = bucket, })); -out: - mutex_unlock(&ca->discard_buckets_in_flight_lock); - return ret; } static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket) { - mutex_lock(&ca->discard_buckets_in_flight_lock); + guard(mutex)(&ca->discard_buckets_in_flight_lock); struct discard_in_flight *i = darray_find_p(ca->discard_buckets_in_flight, i, i->bucket == bucket); BUG_ON(!i || !i->in_progress); darray_remove_item(&ca->discard_buckets_in_flight, i); - mutex_unlock(&ca->discard_buckets_in_flight_lock); } struct discard_buckets_state { @@ -1836,7 +1788,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, struct btree_iter iter = {}; struct bkey_s_c k; struct bkey_i_alloc_v4 *a; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool discard_locked = false; int ret = 0; @@ -1927,7 +1879,6 @@ fsck_err: if (!ret) s->seen++; bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); return ret; } @@ -2024,17 +1975,16 @@ static void bch2_do_discards_fast_work(struct work_struct *work) bool got_bucket = false; u64 bucket; - mutex_lock(&ca->discard_buckets_in_flight_lock); - darray_for_each(ca->discard_buckets_in_flight, i) { - if (i->in_progress) - continue; + scoped_guard(mutex, &ca->discard_buckets_in_flight_lock) + darray_for_each(ca->discard_buckets_in_flight, i) { + if (i->in_progress) + continue; - got_bucket = true; - bucket = i->bucket; - i->in_progress = true; - break; - } - mutex_unlock(&ca->discard_buckets_in_flight_lock); + got_bucket = true; + bucket = i->bucket; + i->in_progress = true; + break; + } if (!got_bucket) break; @@ -2142,7 +2092,7 @@ static int invalidate_one_bucket(struct btree_trans *trans, s64 *nr_to_invalidate) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct bpos bucket = u64_to_bucket(lru_k.k->p.offset); struct btree_iter alloc_iter = {}; int ret = 0; @@ -2203,7 +2153,6 @@ static int invalidate_one_bucket(struct btree_trans *trans, out: fsck_err: bch2_trans_iter_exit(trans, &alloc_iter); - printbuf_exit(&buf); return ret; } @@ -2226,7 +2175,7 @@ static void bch2_do_invalidates_work(struct work_struct *work) { struct bch_dev *ca = container_of(work, struct bch_dev, invalidate_work); struct bch_fs *c = ca->fs; - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); int ret = 0; struct bkey_buf last_flushed; @@ -2268,7 +2217,6 @@ restart_err: } bch2_trans_iter_exit(trans, &iter); err: - bch2_trans_put(trans); bch2_bkey_buf_exit(&last_flushed, c); enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_do_invalidates); enumerated_ref_put(&c->writes, BCH_WRITE_REF_invalidate); @@ -2301,18 +2249,17 @@ void bch2_do_invalidates(struct bch_fs *c) int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, u64 bucket_start, u64 bucket_end) { - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct bkey hole; struct bpos end = POS(ca->dev_idx, bucket_end); - struct bch_member *m; unsigned long last_updated = jiffies; int ret; BUG_ON(bucket_start > bucket_end); BUG_ON(bucket_end > ca->mi.nbuckets); + CLASS(btree_trans, trans)(c); bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(ca->dev_idx, max_t(u64, ca->mi.first_bucket, bucket_start)), BTREE_ITER_prefetch); @@ -2383,17 +2330,16 @@ bkey_err: } bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); if (ret < 0) { bch_err_msg(ca, ret, "initializing free space"); return ret; } - mutex_lock(&c->sb_lock); - m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) { + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true); + } return 0; } @@ -2403,7 +2349,6 @@ int bch2_fs_freespace_init(struct bch_fs *c) if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) return 0; - /* * We can crash during the device add path, so we need to check this on * every mount: @@ -2428,9 +2373,8 @@ int bch2_fs_freespace_init(struct bch_fs *c) } if (doing_init) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); bch2_write_super(c); - mutex_unlock(&c->sb_lock); bch_verbose(c, "done initializing freespace"); } diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 23a9fbb36f49..fd1415524e46 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -106,20 +106,20 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) return; } - spin_lock(&ob->lock); - ob->valid = false; - ob->data_type = 0; - spin_unlock(&ob->lock); + scoped_guard(spinlock, &ob->lock) { + ob->valid = false; + ob->data_type = 0; + } - spin_lock(&c->freelist_lock); - bch2_open_bucket_hash_remove(c, ob); + scoped_guard(spinlock, &c->freelist_lock) { + bch2_open_bucket_hash_remove(c, ob); - ob->freelist = c->open_buckets_freelist; - c->open_buckets_freelist = ob - c->open_buckets; + ob->freelist = c->open_buckets_freelist; + c->open_buckets_freelist = ob - c->open_buckets; - c->open_buckets_nr_free++; - ca->nr_open_buckets--; - spin_unlock(&c->freelist_lock); + c->open_buckets_nr_free++; + ca->nr_open_buckets--; + } closure_wake_up(&c->open_buckets_wait); } @@ -164,14 +164,14 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob) BUG_ON(c->open_buckets_partial_nr >= ARRAY_SIZE(c->open_buckets_partial)); - spin_lock(&c->freelist_lock); - scoped_guard(rcu) + scoped_guard(spinlock, &c->freelist_lock) { + guard(rcu)(); bch2_dev_rcu(c, ob->dev)->nr_partial_buckets++; - ob->on_partial_list = true; - c->open_buckets_partial[c->open_buckets_partial_nr++] = - ob - c->open_buckets; - spin_unlock(&c->freelist_lock); + ob->on_partial_list = true; + c->open_buckets_partial[c->open_buckets_partial_nr++] = + ob - c->open_buckets; + } closure_wake_up(&c->open_buckets_wait); closure_wake_up(&c->freelist_wait); @@ -219,33 +219,31 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, return NULL; } - spin_lock(&c->freelist_lock); + guard(spinlock)(&c->freelist_lock); if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(req->watermark))) { if (cl) closure_wait(&c->open_buckets_wait, cl); track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], true); - spin_unlock(&c->freelist_lock); return ERR_PTR(bch_err_throw(c, open_buckets_empty)); } /* Recheck under lock: */ if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) { - spin_unlock(&c->freelist_lock); req->counters.skipped_open++; return NULL; } struct open_bucket *ob = bch2_open_bucket_alloc(c); - spin_lock(&ob->lock); - ob->valid = true; - ob->sectors_free = ca->mi.bucket_size; - ob->dev = ca->dev_idx; - ob->gen = gen; - ob->bucket = bucket; - spin_unlock(&ob->lock); + scoped_guard(spinlock, &ob->lock) { + ob->valid = true; + ob->sectors_free = ca->mi.bucket_size; + ob->dev = ca->dev_idx; + ob->gen = gen; + ob->bucket = bucket; + } ca->nr_open_buckets++; bch2_open_bucket_hash_add(c, ob); @@ -253,7 +251,6 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], false); track_event_change(&c->times[BCH_TIME_blocked_allocate], false); - spin_unlock(&c->freelist_lock); return ob; } @@ -453,7 +450,7 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct closure *cl, struct open_bucket *ob) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); printbuf_tabstop_push(&buf, 24); @@ -480,8 +477,6 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, prt_printf(&buf, "err\t%s\n", bch2_err_str(PTR_ERR(ob))); trace_bucket_alloc_fail(c, buf.buf); } - - printbuf_exit(&buf); } /** @@ -511,7 +506,8 @@ again: bch2_dev_usage_read_fast(ca, &req->usage); avail = dev_buckets_free(ca, req->usage, req->watermark); - if (req->usage.buckets[BCH_DATA_need_discard] > avail) + if (req->usage.buckets[BCH_DATA_need_discard] > + min(avail, ca->mi.nbuckets >> 7)) bch2_dev_do_discards(ca); if (req->usage.buckets[BCH_DATA_need_gc_gens] > avail) @@ -588,7 +584,8 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, .ca = ca, }; - bch2_trans_do(c, + CLASS(btree_trans, trans)(c); + lockrestart_do(trans, PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, &req, cl, false))); return ob; } @@ -847,17 +844,15 @@ static int bucket_alloc_set_writepoint(struct bch_fs *c, static int bucket_alloc_set_partial(struct bch_fs *c, struct alloc_request *req) { - int i, ret = 0; - if (!c->open_buckets_partial_nr) return 0; - spin_lock(&c->freelist_lock); + guard(spinlock)(&c->freelist_lock); if (!c->open_buckets_partial_nr) - goto unlock; + return 0; - for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) { + for (int i = c->open_buckets_partial_nr - 1; i >= 0; --i) { struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i]; if (want_bucket(c, req, ob)) { @@ -877,14 +872,13 @@ static int bucket_alloc_set_partial(struct bch_fs *c, scoped_guard(rcu) bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--; - ret = add_new_bucket(c, req, ob); + int ret = add_new_bucket(c, req, ob); if (ret) - break; + return ret; } } -unlock: - spin_unlock(&c->freelist_lock); - return ret; + + return 0; } static int __open_bucket_add_buckets(struct btree_trans *trans, @@ -980,23 +974,18 @@ static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c, return ob->ec != NULL; } else if (ca) { bool drop = ob->dev == ca->dev_idx; - struct open_bucket *ob2; - unsigned i; if (!drop && ob->ec) { - unsigned nr_blocks; + guard(mutex)(&ob->ec->lock); + unsigned nr_blocks = bkey_i_to_stripe(&ob->ec->new_stripe.key)->v.nr_blocks; - mutex_lock(&ob->ec->lock); - nr_blocks = bkey_i_to_stripe(&ob->ec->new_stripe.key)->v.nr_blocks; - - for (i = 0; i < nr_blocks; i++) { + for (unsigned i = 0; i < nr_blocks; i++) { if (!ob->ec->blocks[i]) continue; - ob2 = c->open_buckets + ob->ec->blocks[i]; + struct open_bucket *ob2 = c->open_buckets + ob->ec->blocks[i]; drop |= ob2->dev == ca->dev_idx; } - mutex_unlock(&ob->ec->lock); } return drop; @@ -1012,14 +1001,13 @@ static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca, struct open_bucket *ob; unsigned i; - mutex_lock(&wp->lock); + guard(mutex)(&wp->lock); open_bucket_for_each(c, &wp->ptrs, ob, i) if (should_drop_bucket(ob, c, ca, ec)) bch2_open_bucket_put(c, ob); else ob_push(c, &ptrs, ob); wp->ptrs = ptrs; - mutex_unlock(&wp->lock); } void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca, @@ -1035,39 +1023,37 @@ void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca, bch2_writepoint_stop(c, ca, ec, &c->rebalance_write_point); bch2_writepoint_stop(c, ca, ec, &c->btree_write_point); - mutex_lock(&c->btree_reserve_cache_lock); - while (c->btree_reserve_cache_nr) { - struct btree_alloc *a = - &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; + scoped_guard(mutex, &c->btree_reserve_cache_lock) + while (c->btree_reserve_cache_nr) { + struct btree_alloc *a = + &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; - bch2_open_buckets_put(c, &a->ob); - } - mutex_unlock(&c->btree_reserve_cache_lock); + bch2_open_buckets_put(c, &a->ob); + } - spin_lock(&c->freelist_lock); i = 0; - while (i < c->open_buckets_partial_nr) { - struct open_bucket *ob = - c->open_buckets + c->open_buckets_partial[i]; - - if (should_drop_bucket(ob, c, ca, ec)) { - --c->open_buckets_partial_nr; - swap(c->open_buckets_partial[i], - c->open_buckets_partial[c->open_buckets_partial_nr]); - - ob->on_partial_list = false; - - scoped_guard(rcu) - bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--; - - spin_unlock(&c->freelist_lock); - bch2_open_bucket_put(c, ob); - spin_lock(&c->freelist_lock); - } else { - i++; + scoped_guard(spinlock, &c->freelist_lock) + while (i < c->open_buckets_partial_nr) { + struct open_bucket *ob = + c->open_buckets + c->open_buckets_partial[i]; + + if (should_drop_bucket(ob, c, ca, ec)) { + --c->open_buckets_partial_nr; + swap(c->open_buckets_partial[i], + c->open_buckets_partial[c->open_buckets_partial_nr]); + + ob->on_partial_list = false; + + scoped_guard(rcu) + bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--; + + spin_unlock(&c->freelist_lock); + bch2_open_bucket_put(c, ob); + spin_lock(&c->freelist_lock); + } else { + i++; + } } - } - spin_unlock(&c->freelist_lock); bch2_ec_stop_dev(c, ca); } @@ -1121,22 +1107,17 @@ static noinline bool try_decrease_writepoints(struct btree_trans *trans, unsigne struct open_bucket *ob; unsigned i; - mutex_lock(&c->write_points_hash_lock); - if (c->write_points_nr < old_nr) { - mutex_unlock(&c->write_points_hash_lock); - return true; - } - - if (c->write_points_nr == 1 || - !too_many_writepoints(c, 8)) { - mutex_unlock(&c->write_points_hash_lock); - return false; - } + scoped_guard(mutex, &c->write_points_hash_lock) { + if (c->write_points_nr < old_nr) + return true; - wp = c->write_points + --c->write_points_nr; + if (c->write_points_nr == 1 || + !too_many_writepoints(c, 8)) + return false; - hlist_del_rcu(&wp->node); - mutex_unlock(&c->write_points_hash_lock); + wp = c->write_points + --c->write_points_nr; + hlist_del_rcu(&wp->node); + } bch2_trans_mutex_lock_norelock(trans, &wp->lock); open_bucket_for_each(c, &wp->ptrs, ob, i) @@ -1470,35 +1451,25 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c, struct bch_dev *ca) { - struct open_bucket *ob; - - out->atomic++; + guard(printbuf_atomic)(out); - for (ob = c->open_buckets; + for (struct open_bucket *ob = c->open_buckets; ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) { - spin_lock(&ob->lock); + guard(spinlock)(&ob->lock); if (ob->valid && (!ca || ob->dev == ca->dev_idx)) bch2_open_bucket_to_text(out, c, ob); - spin_unlock(&ob->lock); } - - --out->atomic; } void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c) { - unsigned i; - - out->atomic++; - spin_lock(&c->freelist_lock); + guard(printbuf_atomic)(out); + guard(spinlock)(&c->freelist_lock); - for (i = 0; i < c->open_buckets_partial_nr; i++) + for (unsigned i = 0; i < c->open_buckets_partial_nr; i++) bch2_open_bucket_to_text(out, c, c->open_buckets + c->open_buckets_partial[i]); - - spin_unlock(&c->freelist_lock); - --out->atomic; } static const char * const bch2_write_point_states[] = { @@ -1514,7 +1485,7 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c, struct open_bucket *ob; unsigned i; - mutex_lock(&wp->lock); + guard(mutex)(&wp->lock); prt_printf(out, "%lu: ", wp->write_point); prt_human_readable_u64(out, wp->sectors_allocated << 9); @@ -1533,8 +1504,6 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c, open_bucket_for_each(c, &wp->ptrs, ob, i) bch2_open_bucket_to_text(out, c, ob); printbuf_indent_sub(out, 2); - - mutex_unlock(&wp->lock); } void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c) @@ -1621,7 +1590,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) static noinline void bch2_print_allocator_stuck(struct bch_fs *c) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "Allocator stuck? Waited for %u seconds\n", c->opts.allocator_stuck_timeout); @@ -1634,8 +1603,8 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) bch2_printbuf_make_room(&buf, 4096); - buf.atomic++; - scoped_guard(rcu) + scoped_guard(rcu) { + guard(printbuf_atomic)(&buf); for_each_online_member_rcu(c, ca) { prt_printf(&buf, "Dev %u:\n", ca->dev_idx); printbuf_indent_add(&buf, 2); @@ -1643,7 +1612,7 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) printbuf_indent_sub(&buf, 2); prt_newline(&buf); } - --buf.atomic; + } prt_printf(&buf, "Copygc debug:\n"); printbuf_indent_add(&buf, 2); @@ -1657,7 +1626,6 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) printbuf_indent_sub(&buf, 2); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } static inline unsigned allocator_wait_timeout(struct bch_fs *c) diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 1b3fc8460096..02aef66859c3 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -210,16 +210,11 @@ static inline bool bch2_bucket_is_open(struct bch_fs *c, unsigned dev, u64 bucke static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64 bucket) { - bool ret; - if (bch2_bucket_is_open(c, dev, bucket)) return true; - spin_lock(&c->freelist_lock); - ret = bch2_bucket_is_open(c, dev, bucket); - spin_unlock(&c->freelist_lock); - - return ret; + guard(spinlock)(&c->freelist_lock); + return bch2_bucket_is_open(c, dev, bucket); } enum bch_write_flags; diff --git a/fs/bcachefs/async_objs.c b/fs/bcachefs/async_objs.c index a7cd1f0f0964..ad04e5f0f056 100644 --- a/fs/bcachefs/async_objs.c +++ b/fs/bcachefs/async_objs.c @@ -13,28 +13,38 @@ #include <linux/debugfs.h> -static void promote_obj_to_text(struct printbuf *out, void *obj) +static void promote_obj_to_text(struct printbuf *out, + struct bch_fs *c, + void *obj) { - bch2_promote_op_to_text(out, obj); + bch2_promote_op_to_text(out, c, obj); } -static void rbio_obj_to_text(struct printbuf *out, void *obj) +static void rbio_obj_to_text(struct printbuf *out, + struct bch_fs *c, + void *obj) { - bch2_read_bio_to_text(out, obj); + bch2_read_bio_to_text(out, c, obj); } -static void write_op_obj_to_text(struct printbuf *out, void *obj) +static void write_op_obj_to_text(struct printbuf *out, + struct bch_fs *c, + void *obj) { bch2_write_op_to_text(out, obj); } -static void btree_read_bio_obj_to_text(struct printbuf *out, void *obj) +static void btree_read_bio_obj_to_text(struct printbuf *out, + struct bch_fs *c, + void *obj) { struct btree_read_bio *rbio = obj; bch2_btree_read_bio_to_text(out, rbio); } -static void btree_write_bio_obj_to_text(struct printbuf *out, void *obj) +static void btree_write_bio_obj_to_text(struct printbuf *out, + struct bch_fs *c, + void *obj) { struct btree_write_bio *wbio = obj; bch2_bio_to_text(out, &wbio->wbio.bio); @@ -79,13 +89,12 @@ static ssize_t bch2_async_obj_list_read(struct file *file, char __user *buf, if (!i->size) break; - list->obj_to_text(&i->buf, obj); + list->obj_to_text(&i->buf, i->c, obj); + i->iter = iter.pos; } if (i->buf.allocation_failure) ret = -ENOMEM; - else - i->iter = iter.pos; if (!ret) ret = bch2_debugfs_flush_buf(i); diff --git a/fs/bcachefs/async_objs.h b/fs/bcachefs/async_objs.h index cd6489b8cf76..451db4c51fb2 100644 --- a/fs/bcachefs/async_objs.h +++ b/fs/bcachefs/async_objs.h @@ -3,9 +3,10 @@ #define _BCACHEFS_ASYNC_OBJS_H #ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS -static inline void __async_object_list_del(struct fast_list *head, unsigned idx) +static inline void __async_object_list_del(struct fast_list *head, unsigned *idx) { - fast_list_remove(head, idx); + fast_list_remove(head, *idx); + *idx = 0; } static inline int __async_object_list_add(struct fast_list *head, void *obj, unsigned *idx) @@ -16,7 +17,7 @@ static inline int __async_object_list_add(struct fast_list *head, void *obj, uns } #define async_object_list_del(_c, _list, idx) \ - __async_object_list_del(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, idx) + __async_object_list_del(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, &idx) #define async_object_list_add(_c, _list, obj, idx) \ __async_object_list_add(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, obj, idx) diff --git a/fs/bcachefs/async_objs_types.h b/fs/bcachefs/async_objs_types.h index 8d713c0f5841..ed262c874ad0 100644 --- a/fs/bcachefs/async_objs_types.h +++ b/fs/bcachefs/async_objs_types.h @@ -18,7 +18,7 @@ enum bch_async_obj_lists { struct async_obj_list { struct fast_list list; - void (*obj_to_text)(struct printbuf *, void *); + void (*obj_to_text)(struct printbuf *, struct bch_fs *, void *); unsigned idx; }; diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 77d93beb3c8f..bd26ab3e6812 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -108,7 +108,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, bool insert) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool will_check = c->recovery.passes_to_run & BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers); int ret = 0; @@ -144,8 +144,8 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, if (!will_check && __bch2_inconsistent_error(c, &buf)) ret = bch_err_throw(c, erofs_unfixed_errors); - bch_err(c, "%s", buf.buf); - printbuf_exit(&buf); + if (buf.buf) + bch_err(c, "%s", buf.buf); return ret; } @@ -208,7 +208,7 @@ static int backpointer_target_not_found(struct btree_trans *trans, bool commit) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; /* @@ -244,7 +244,7 @@ static int backpointer_target_not_found(struct btree_trans *trans, "%s", buf.buf)) { ret = bch2_backpointer_del(trans, bp.k->p); if (ret || !commit) - goto out; + return ret; /* * Normally, on transaction commit from inside a transaction, @@ -262,9 +262,7 @@ static int backpointer_target_not_found(struct btree_trans *trans, */ ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); } -out: fsck_err: - printbuf_exit(&buf); return ret; } @@ -388,7 +386,7 @@ static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, st struct bch_fs *c = trans->c; struct btree_iter alloc_iter = {}; struct bkey_s_c alloc_k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; struct bpos bucket; @@ -423,7 +421,6 @@ static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, st out: fsck_err: bch2_trans_iter_exit(trans, &alloc_iter); - printbuf_exit(&buf); return ret; } @@ -434,14 +431,13 @@ int bch2_check_btree_backpointers(struct bch_fs *c) bch2_bkey_buf_init(&last_flushed); bkey_init(&last_flushed.k->k); - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, POS_MIN, 0, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed))); + bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed)); bch2_bkey_buf_exit(&last_flushed, c); - bch_err_fn(c, ret); return ret; } @@ -471,7 +467,7 @@ static int check_extent_checksum(struct btree_trans *trans, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent); const union bch_extent_entry *entry; struct extent_ptr_decoded p; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); void *data_buf = NULL; struct bio *bio = NULL; size_t bytes; @@ -530,7 +526,6 @@ err: kvfree(data_buf); enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_check_extent_checksums); - printbuf_exit(&buf); return ret; } @@ -541,7 +536,7 @@ static int check_bp_exists(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_iter other_extent_iter = {}; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); if (bpos_lt(bp->k.p, s->bp_start) || bpos_gt(bp->k.p, s->bp_end)) @@ -566,7 +561,6 @@ err: fsck_err: bch2_trans_iter_exit(trans, &other_extent_iter); bch2_trans_iter_exit(trans, &bp_iter); - printbuf_exit(&buf); return ret; check_existing_bp: /* Do we have a backpointer for a different extent? */ @@ -895,7 +889,7 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b u32 sectors[ALLOC_SECTORS_NR]; memset(sectors, 0, sizeof(sectors)); - struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(trans->c, alloc_k.k->p); + CLASS(bch2_dev_bucket_tryget_noerror, ca)(trans->c, alloc_k.k->p); if (!ca) return 0; @@ -932,12 +926,12 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b }; bch2_trans_iter_exit(trans, &iter); if (ret) - goto err; + return ret; if (need_commit) { ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); if (ret) - goto err; + return ret; } if (sectors[ALLOC_dirty] != a->dirty_sectors || @@ -946,15 +940,14 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) { ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed); if (ret) - goto err; + return ret; } if (sectors[ALLOC_dirty] > a->dirty_sectors || sectors[ALLOC_cached] > a->cached_sectors || sectors[ALLOC_stripe] > a->stripe_sectors) { - ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?: + return check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?: bch_err_throw(c, transaction_restart_nested); - goto err; } bool empty = (sectors[ALLOC_dirty] + @@ -970,9 +963,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b *had_mismatch = true; } -err: - bch2_dev_put(ca); - return ret; + + return 0; } static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) @@ -1107,7 +1099,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) { int ret = 0; - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct extents_to_bp_state s = { .bp_start = POS_MIN }; bch2_bkey_buf_init(&s.last_flushed); @@ -1146,7 +1138,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) if (!bpos_eq(s.bp_start, POS_MIN) || !bpos_eq(s.bp_end, SPOS_MAX)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "check_extents_to_backpointers(): "); bch2_bpos_to_text(&buf, s.bp_start); @@ -1154,7 +1146,6 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) bch2_bpos_to_text(&buf, s.bp_end); bch_verbose(c, "%s", buf.buf); - printbuf_exit(&buf); } ret = bch2_check_extents_to_backpointers_pass(trans, &s); @@ -1169,11 +1160,8 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) bch2_bucket_bitmap_free(&ca->bucket_backpointer_empty); } err: - bch2_trans_put(trans); bch2_bkey_buf_exit(&s.last_flushed, c); bch2_btree_cache_unpin(c); - - bch_err_fn(c, ret); return ret; } @@ -1211,7 +1199,7 @@ int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans, u64 nr = ca->bucket_backpointer_mismatch.nr; u64 allowed = copygc ? ca->mi.nbuckets >> 7 : 0; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); __bch2_log_msg_start(ca->name, &buf); prt_printf(&buf, "Detected missing backpointers in bucket %llu, now have %llu/%llu with missing\n", @@ -1222,7 +1210,6 @@ int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans, nr < allowed ? RUN_RECOVERY_PASS_ratelimit : 0); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); return 0; } @@ -1299,7 +1286,7 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, int bch2_check_backpointers_to_extents(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end; int ret; @@ -1319,7 +1306,7 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c) if (bbpos_cmp(start, BBPOS_MIN) || bbpos_cmp(end, BBPOS_MAX)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "check_backpointers_to_extents(): "); bch2_bbpos_to_text(&buf, start); @@ -1327,7 +1314,6 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c) bch2_bbpos_to_text(&buf, end); bch_verbose(c, "%s", buf.buf); - printbuf_exit(&buf); } ret = bch2_check_backpointers_to_extents_pass(trans, start, end); @@ -1336,11 +1322,8 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c) start = bbpos_successor(end); } - bch2_trans_put(trans); bch2_btree_cache_unpin(c); - - bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index fb3156ed7f0b..8a6f886b5bf2 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -329,19 +329,21 @@ do { \ bch2_print_str(_c, __VA_ARGS__); \ } while (0) -#define bch_info(c, fmt, ...) \ - bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) -#define bch_info_ratelimited(c, fmt, ...) \ - bch2_print_ratelimited(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) -#define bch_notice(c, fmt, ...) \ - bch2_print(c, KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__) -#define bch_warn(c, fmt, ...) \ - bch2_print(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__) -#define bch_warn_ratelimited(c, fmt, ...) \ - bch2_print_ratelimited(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__) - -#define bch_err(c, fmt, ...) \ - bch2_print(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) +#define bch_log(c, loglevel, fmt, ...) \ + bch2_print(c, loglevel bch2_fmt(c, fmt), ##__VA_ARGS__) +#define bch_log_ratelimited(c, loglevel, fmt, ...) \ + bch2_print_ratelimited(c, loglevel bch2_fmt(c, fmt), ##__VA_ARGS__) + +#define bch_err(c, ...) bch_log(c, KERN_ERR, __VA_ARGS__) +#define bch_err_ratelimited(c, ...) bch_log_ratelimited(c, KERN_ERR, __VA_ARGS__) +#define bch_warn(c, ...) bch_log(c, KERN_WARNING, __VA_ARGS__) +#define bch_warn_ratelimited(c, ...) bch_log_ratelimited(c, KERN_WARNING, __VA_ARGS__) +#define bch_notice(c, ...) bch_log(c, KERN_NOTICE, __VA_ARGS__) +#define bch_info(c, ...) bch_log(c, KERN_INFO, __VA_ARGS__) +#define bch_info_ratelimited(c, ...) bch_log_ratelimited(c, KERN_INFO, __VA_ARGS__) +#define bch_verbose(c, ...) bch_log(c, KERN_DEBUG, __VA_ARGS__) +#define bch_verbose_ratelimited(c, ...) bch_log_ratelimited(c, KERN_DEBUG, __VA_ARGS__) + #define bch_err_dev(ca, fmt, ...) \ bch2_print(c, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__) #define bch_err_dev_offset(ca, _offset, fmt, ...) \ @@ -351,8 +353,6 @@ do { \ #define bch_err_inum_offset(c, _inum, _offset, fmt, ...) \ bch2_print(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__) -#define bch_err_ratelimited(c, fmt, ...) \ - bch2_print_ratelimited(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) #define bch_err_dev_ratelimited(ca, fmt, ...) \ bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__) #define bch_err_dev_offset_ratelimited(ca, _offset, fmt, ...) \ @@ -386,24 +386,6 @@ do { \ ##__VA_ARGS__, bch2_err_str(_ret)); \ } while (0) -#define bch_verbose(c, fmt, ...) \ -do { \ - if ((c)->opts.verbose) \ - bch_info(c, fmt, ##__VA_ARGS__); \ -} while (0) - -#define bch_verbose_ratelimited(c, fmt, ...) \ -do { \ - if ((c)->opts.verbose) \ - bch_info_ratelimited(c, fmt, ##__VA_ARGS__); \ -} while (0) - -#define pr_verbose_init(opts, fmt, ...) \ -do { \ - if (opt_get(opts, verbose)) \ - pr_info(fmt, ##__VA_ARGS__); \ -} while (0) - static inline int __bch2_err_trace(struct bch_fs *c, int err) { trace_error_throw(c, err, _THIS_IP_); @@ -833,6 +815,8 @@ struct bch_fs { struct bch_disk_groups_cpu __rcu *disk_groups; struct bch_opts opts; + unsigned loglevel; + unsigned prev_loglevel; /* Updated by bch2_sb_update():*/ struct { @@ -1181,7 +1165,7 @@ static inline bool bch2_ro_ref_tryget(struct bch_fs *c) static inline void bch2_ro_ref_put(struct bch_fs *c) { - if (refcount_dec_and_test(&c->ro_ref)) + if (c && refcount_dec_and_test(&c->ro_ref)) wake_up(&c->ro_ref_wait); } diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c index ee823c640642..67e39f835b96 100644 --- a/fs/bcachefs/bkey.c +++ b/fs/bcachefs/bkey.c @@ -624,10 +624,8 @@ struct bkey_format bch2_bkey_format_done(struct bkey_format_state *s) } if (static_branch_unlikely(&bch2_debug_check_bkey_unpack)) { - struct printbuf buf = PRINTBUF; - + CLASS(printbuf, buf)(); BUG_ON(bch2_bkey_format_invalid(NULL, &ret, 0, &buf)); - printbuf_exit(&buf); } return ret; diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c index 32841f762eb2..72698c0d9f0e 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c @@ -58,7 +58,7 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b, struct bkey_packed *_k, *_n; struct bkey uk, n; struct bkey_s_c k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); if (!i->u64s) return; @@ -97,8 +97,6 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b, if (!bkey_deleted(k.k) && bpos_eq(n.p, k.k->p)) printk(KERN_ERR "Duplicate keys\n"); } - - printbuf_exit(&buf); } void bch2_dump_btree_node(struct bch_fs *c, struct btree *b) @@ -113,7 +111,7 @@ void bch2_dump_btree_node_iter(struct btree *b, struct btree_node_iter *iter) { struct btree_node_iter_set *set; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); printk(KERN_ERR "btree node iter with %u/%u sets:\n", __btree_node_iter_used(iter), b->nsets); @@ -128,8 +126,6 @@ void bch2_dump_btree_node_iter(struct btree *b, printk(KERN_ERR "set %zu key %u: %s\n", t - b->set, set->k, buf.buf); } - - printbuf_exit(&buf); } struct btree_nr_keys bch2_btree_node_count_keys(struct btree *b) @@ -362,27 +358,6 @@ static struct bkey_float *bkey_float(const struct btree *b, return ro_aux_tree_base(b, t)->f + idx; } -static void __bset_aux_tree_verify(struct btree *b) -{ - for_each_bset(b, t) { - if (t->aux_data_offset == U16_MAX) - continue; - - BUG_ON(t != b->set && - t[-1].aux_data_offset == U16_MAX); - - BUG_ON(t->aux_data_offset < bset_aux_tree_buf_start(b, t)); - BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b)); - BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b)); - } -} - -static inline void bset_aux_tree_verify(struct btree *b) -{ - if (static_branch_unlikely(&bch2_debug_check_bset_lookups)) - __bset_aux_tree_verify(b); -} - void bch2_btree_keys_init(struct btree *b) { unsigned i; @@ -538,6 +513,51 @@ static inline void bch2_bset_verify_rw_aux_tree(struct btree *b, __bch2_bset_verify_rw_aux_tree(b, t); } +static void __bset_aux_tree_verify_ro(struct btree *b, struct bset_tree *t) +{ + struct bkey_packed *k = btree_bkey_first(b, t); + + eytzinger1_for_each(j, t->size - 1) { + while (tree_to_bkey(b, t, j) > k && + k != btree_bkey_last(b, t)) + k = bkey_p_next(k); + + BUG_ON(tree_to_bkey(b, t, j) != k); + } +} + +static void __bset_aux_tree_verify(struct btree *b) +{ + for_each_bset(b, t) { + if (t->aux_data_offset == U16_MAX) + continue; + + BUG_ON(t != b->set && + t[-1].aux_data_offset == U16_MAX); + + BUG_ON(t->aux_data_offset < bset_aux_tree_buf_start(b, t)); + BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b)); + BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b)); + + switch (bset_aux_tree_type(t)) { + case BSET_RO_AUX_TREE: + __bset_aux_tree_verify_ro(b, t); + break; + case BSET_RW_AUX_TREE: + __bch2_bset_verify_rw_aux_tree(b, t); + break; + default: + break; + } + } +} + +static inline void bset_aux_tree_verify(struct btree *b) +{ + if (static_branch_unlikely(&bch2_debug_check_bset_lookups)) + __bset_aux_tree_verify(b); +} + /* returns idx of first entry >= offset: */ static unsigned rw_aux_tree_bsearch(struct btree *b, struct bset_tree *t, diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 49505653fe12..23ed7393f07f 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -78,9 +78,8 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) { struct btree_cache *bc = &c->btree_cache; - mutex_lock(&bc->lock); - __bch2_btree_node_to_freelist(bc, b); - mutex_unlock(&bc->lock); + scoped_guard(mutex, &bc->lock) + __bch2_btree_node_to_freelist(bc, b); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); @@ -215,14 +214,13 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b) { struct btree_cache *bc = &c->btree_cache; - mutex_lock(&bc->lock); + guard(mutex)(&bc->lock); if (b != btree_node_root(c, b) && !btree_node_pinned(b)) { set_btree_node_pinned(b); list_move(&b->list, &bc->live[1].list); bc->live[0].nr--; bc->live[1].nr++; } - mutex_unlock(&bc->lock); } void bch2_btree_cache_unpin(struct bch_fs *c) @@ -230,7 +228,7 @@ void bch2_btree_cache_unpin(struct bch_fs *c) struct btree_cache *bc = &c->btree_cache; struct btree *b, *n; - mutex_lock(&bc->lock); + guard(mutex)(&bc->lock); c->btree_cache.pinned_nodes_mask[0] = 0; c->btree_cache.pinned_nodes_mask[1] = 0; @@ -240,8 +238,6 @@ void bch2_btree_cache_unpin(struct bch_fs *c) bc->live[0].nr++; bc->live[1].nr--; } - - mutex_unlock(&bc->lock); } /* Btree in memory cache - hash table */ @@ -296,11 +292,8 @@ int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b, b->c.level = level; b->c.btree_id = id; - mutex_lock(&bc->lock); - int ret = __bch2_btree_node_hash_insert(bc, b); - mutex_unlock(&bc->lock); - - return ret; + guard(mutex)(&bc->lock); + return __bch2_btree_node_hash_insert(bc, b); } void bch2_btree_node_update_key_early(struct btree_trans *trans, @@ -317,7 +310,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *trans, b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true); if (!IS_ERR_OR_NULL(b)) { - mutex_lock(&c->btree_cache.lock); + guard(mutex)(&c->btree_cache.lock); __bch2_btree_node_hash_remove(&c->btree_cache, b); @@ -325,7 +318,6 @@ void bch2_btree_node_update_key_early(struct btree_trans *trans, ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); BUG_ON(ret); - mutex_unlock(&c->btree_cache.lock); six_unlock_read(&b->c.lock); } @@ -930,20 +922,18 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, } if (unlikely(!bkey_is_btree_ptr(&k->k))) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf); - printbuf_exit(&buf); return ERR_PTR(ret); } if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf); - printbuf_exit(&buf); return ERR_PTR(ret); } @@ -1018,11 +1008,10 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) { - struct printbuf buf = PRINTBUF; - if (c->recovery.pass_done < BCH_RECOVERY_PASS_check_allocations) return; + CLASS(printbuf, buf)(); prt_printf(&buf, "btree node header doesn't match ptr: "); bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); @@ -1038,8 +1027,6 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) bch2_bpos_to_text(&buf, b->data->max_key); bch2_fs_topology_error(c, "%s", buf.buf); - - printbuf_exit(&buf); } static inline void btree_check_header(struct bch_fs *c, struct btree *b) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 7269490a5d9a..34cb8a4324dc 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -95,11 +95,10 @@ static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k) static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) { - preempt_disable(); + guard(preempt)(); write_seqcount_begin(&c->gc_pos_lock); c->gc_pos = new_pos; write_seqcount_end(&c->gc_pos_lock); - preempt_enable(); } static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) @@ -138,14 +137,13 @@ static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min) int ret; if (c->opts.verbose) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); prt_str(&buf, " -> "); bch2_bpos_to_text(&buf, new_min); bch_info(c, "%s(): %s", __func__, buf.buf); - printbuf_exit(&buf); } new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL); @@ -174,14 +172,13 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) int ret; if (c->opts.verbose) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); prt_str(&buf, " -> "); bch2_bpos_to_text(&buf, new_max); bch_info(c, "%s(): %s", __func__, buf.buf); - printbuf_exit(&buf); } ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p); @@ -205,13 +202,12 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) bch2_btree_node_drop_keys_outside_node(b); - mutex_lock(&c->btree_cache.lock); + guard(mutex)(&c->btree_cache.lock); __bch2_btree_node_hash_remove(&c->btree_cache, b); bkey_copy(&b->key, &new->k_i); ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); BUG_ON(ret); - mutex_unlock(&c->btree_cache.lock); return 0; } @@ -223,7 +219,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * struct bpos expected_start = !prev ? b->data->min_key : bpos_successor(prev->key.k.p); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && @@ -253,7 +249,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * expected_start, bpos_predecessor(cur->data->min_key)); if (ret) - goto err; + return ret; *pulled_from_scan = cur->data->min_key; ret = DID_FILL_FROM_SCAN; @@ -286,9 +282,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * } } } -err: fsck_err: - printbuf_exit(&buf); return ret; } @@ -296,7 +290,7 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b, struct btree *child, struct bpos *pulled_from_scan) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; if (bpos_eq(child->key.k.p, b->key.k.p)) @@ -317,7 +311,7 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b, ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0, bpos_successor(child->key.k.p), b->key.k.p); if (ret) - goto err; + return ret; *pulled_from_scan = b->key.k.p; ret = DID_FILL_FROM_SCAN; @@ -325,9 +319,7 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b, ret = set_node_max(c, child, b->key.k.p); } } -err: fsck_err: - printbuf_exit(&buf); return ret; } @@ -340,7 +332,7 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct struct bkey_buf prev_k, cur_k; struct btree *prev = NULL, *cur = NULL; bool have_child, new_pass = false; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; if (!b->c.level) @@ -529,7 +521,6 @@ fsck_err: bch2_bkey_buf_exit(&prev_k, c); bch2_bkey_buf_exit(&cur_k, c); - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } @@ -539,7 +530,7 @@ static int bch2_check_root(struct btree_trans *trans, enum btree_id btree, { struct bch_fs *c = trans->c; struct btree_root *r = bch2_btree_id_root(c, btree); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; bch2_btree_id_to_text(&buf, btree); @@ -568,21 +559,20 @@ static int bch2_check_root(struct btree_trans *trans, enum btree_id btree, bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); ret = bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX); if (ret) - goto err; + return ret; } *reconstructed_root = true; } err: fsck_err: - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } int bch2_check_topology(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bpos pulled_from_scan = POS_MIN; int ret = 0; @@ -603,9 +593,8 @@ recover: six_unlock_read(&b->c.lock); if (ret == DROP_THIS_NODE) { - mutex_lock(&c->btree_cache.lock); - bch2_btree_node_hash_remove(&c->btree_cache, b); - mutex_unlock(&c->btree_cache.lock); + scoped_guard(mutex, &c->btree_cache.lock) + bch2_btree_node_hash_remove(&c->btree_cache, b); r->b = NULL; @@ -614,17 +603,15 @@ recover: goto recover; } - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_btree_id_to_text(&buf, i); bch_err(c, "empty btree root %s", buf.buf); - printbuf_exit(&buf); bch2_btree_root_alloc_fake_trans(trans, i, 0); r->alive = false; ret = 0; } } - bch2_trans_put(trans); return ret; } @@ -651,7 +638,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, struct bkey deleted = KEY(0, 0, 0); struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL }; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; deleted.p = k.k->p; @@ -675,10 +662,9 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); bch2_dev_btree_bitmap_mark(c, k); bch2_write_super(c); - mutex_unlock(&c->sb_lock); } /* @@ -703,7 +689,6 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, BTREE_TRIGGER_gc|BTREE_TRIGGER_insert|flags); out: fsck_err: - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } @@ -771,8 +756,8 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r) static int bch2_gc_btrees(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); - struct printbuf buf = PRINTBUF; + CLASS(btree_trans, trans)(c); + CLASS(printbuf, buf)(); int ret = 0; struct progress_indicator_state progress; @@ -792,8 +777,6 @@ static int bch2_gc_btrees(struct bch_fs *c) ret = bch2_gc_btree(trans, &progress, btree, true); } - printbuf_exit(&buf); - bch2_trans_put(trans); bch_err_fn(c, ret); return ret; } @@ -945,16 +928,16 @@ fsck_err: static int bch2_gc_alloc_done(struct bch_fs *c) { + CLASS(btree_trans, trans)(c); int ret = 0; for_each_member_device(c, ca) { - ret = bch2_trans_run(c, - for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc, + ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, ca->mi.first_bucket), POS(ca->dev_idx, ca->mi.nbuckets - 1), BTREE_ITER_slots|BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_alloc_write_key(trans, &iter, ca, k))); + bch2_alloc_write_key(trans, &iter, ca, k)); if (ret) { bch2_dev_put(ca); break; @@ -987,7 +970,7 @@ static int bch2_gc_write_stripes_key(struct btree_trans *trans, struct bkey_s_c k) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); const struct bch_stripe *s; struct gc_stripe *m; bool bad = false; @@ -1032,18 +1015,17 @@ static int bch2_gc_write_stripes_key(struct btree_trans *trans, ret = bch2_trans_update(trans, iter, &new->k_i, 0); } fsck_err: - printbuf_exit(&buf); return ret; } static int bch2_gc_stripes_done(struct bch_fs *c) { - return bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_stripes, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_gc_write_stripes_key(trans, &iter, k))); + bch2_gc_write_stripes_key(trans, &iter, k)); } /** @@ -1072,8 +1054,8 @@ int bch2_check_allocations(struct bch_fs *c) { int ret; - down_read(&c->state_lock); - down_write(&c->gc_lock); + guard(rwsem_read)(&c->state_lock); + guard(rwsem_write)(&c->gc_lock); bch2_btree_interior_updates_flush(c); @@ -1102,15 +1084,11 @@ int bch2_check_allocations(struct bch_fs *c) bch2_gc_stripes_done(c) ?: bch2_gc_reflink_done(c); out: - percpu_down_write(&c->mark_lock); - /* Indicates that gc is no longer in progress: */ - __gc_pos_set(c, gc_phase(GC_PHASE_not_running)); - - bch2_gc_free(c); - percpu_up_write(&c->mark_lock); - - up_write(&c->gc_lock); - up_read(&c->state_lock); + scoped_guard(percpu_write, &c->mark_lock) { + /* Indicates that gc is no longer in progress: */ + __gc_pos_set(c, gc_phase(GC_PHASE_not_running)); + bch2_gc_free(c); + } /* * At startup, allocations can happen directly instead of via the @@ -1121,7 +1099,6 @@ out: if (!ret && !test_bit(BCH_FS_errors_not_fixed, &c->flags)) bch2_sb_members_clean_deleted(c); - bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index b30799e494eb..bd86dd7151a1 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -24,6 +24,7 @@ #include "super-io.h" #include "trace.h" +#include <linux/moduleparam.h> #include <linux/sched/mm.h> #ifdef CONFIG_BCACHEFS_DEBUG @@ -591,7 +592,7 @@ static int __btree_err(int ret, !(test_bit(BCH_FS_in_fsck, &c->flags) && c->opts.fix_errors == FSCK_FIX_ask); - struct printbuf out = PRINTBUF; + CLASS(printbuf, out)(); bch2_log_msg_start(c, &out); if (!print_deferred) @@ -618,13 +619,13 @@ static int __btree_err(int ret, if (!have_retry) ret = bch_err_throw(c, fsck_fix); - goto out; + return ret; case -BCH_ERR_btree_node_read_err_bad_node: prt_str(&out, ", "); break; } - goto out; + return ret; } if (rw == WRITE) { @@ -646,16 +647,14 @@ static int __btree_err(int ret, if (!have_retry) ret = bch_err_throw(c, fsck_fix); - goto out; + return ret; case -BCH_ERR_btree_node_read_err_bad_node: prt_str(&out, ", "); break; } print: bch2_print_str(c, KERN_ERR, out.buf); -out: fsck_err: - printbuf_exit(&out); return ret; } @@ -734,8 +733,8 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, struct printbuf *err_msg) { unsigned version = le16_to_cpu(i->version); - struct printbuf buf1 = PRINTBUF; - struct printbuf buf2 = PRINTBUF; + CLASS(printbuf, buf1)(); + CLASS(printbuf, buf2)(); int ret = 0; btree_err_on(!bch2_version_compatible(version), @@ -754,10 +753,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, "bset version %u older than superblock version_min %u", version, c->sb.version_min)) { if (bch2_version_compatible(version)) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); c->disk_sb.sb->version_min = cpu_to_le16(version); bch2_write_super(c); - mutex_unlock(&c->sb_lock); } else { /* We have no idea what's going on: */ i->version = cpu_to_le16(c->sb.version); @@ -771,10 +769,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, btree_node_bset_newer_than_sb, "bset version %u newer than superblock version %u", version, c->sb.version)) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); c->disk_sb.sb->version = cpu_to_le16(version); bch2_write_super(c); - mutex_unlock(&c->sb_lock); } btree_err_on(BSET_SEPARATE_WHITEOUTS(i), @@ -874,8 +871,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, &bn->format); } fsck_err: - printbuf_exit(&buf2); - printbuf_exit(&buf1); return ret; } @@ -945,7 +940,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, { unsigned version = le16_to_cpu(i->version); struct bkey_packed *k, *prev = NULL; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); int ret = 0; @@ -1050,7 +1045,6 @@ got_good_key: set_btree_node_need_rewrite_error(b); } fsck_err: - printbuf_exit(&buf); return ret; } @@ -1069,7 +1063,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); u64 max_journal_seq = 0; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0, write = READ; u64 start_time = local_clock(); @@ -1301,9 +1295,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_bounce_free(c, btree_buf_bytes(b), used_mempool, sorted); - if (updated_range) - bch2_btree_node_drop_keys_outside_node(b); - i = &b->data->keys; for (k = i->start; k != vstruct_last(i);) { struct bkey tmp; @@ -1341,6 +1332,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_node_reset_sib_u64s(b); + if (updated_range) + bch2_btree_node_drop_keys_outside_node(b); + /* * XXX: * @@ -1384,7 +1378,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, } fsck_err: mempool_free(iter, &c->fill_iter); - printbuf_exit(&buf); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time); return ret; } @@ -1400,7 +1393,7 @@ static void btree_node_read_work(struct work_struct *work) struct bch_io_failures failed = { .nr = 0 }; int ret = 0; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "btree node read error at btree "); @@ -1492,7 +1485,6 @@ start: bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], rb->start_time); bio_put(&rb->bio); - printbuf_exit(&buf); clear_btree_node_read_in_flight(b); smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); @@ -1574,7 +1566,7 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) closure_type(ra, struct btree_node_read_all, cl); struct bch_fs *c = ra->c; struct btree *b = ra->b; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool dump_bset_maps = false; int ret = 0, best = -1, write = READ; unsigned i, written = 0, written2 = 0; @@ -1683,11 +1675,10 @@ fsck_err: if (ret) { set_btree_node_read_error(b); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_btree_lost_data(c, &buf, b->c.btree_id); if (buf.pos) bch_err(c, "%s", buf.buf); - printbuf_exit(&buf); } else if (*saw_error) bch2_btree_node_rewrite_async(c, b); @@ -1698,7 +1689,6 @@ fsck_err: closure_debug_destroy(&ra->cl); kfree(ra); - printbuf_exit(&buf); clear_btree_node_read_in_flight(b); smp_mb__after_atomic(); @@ -1818,7 +1808,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, if (ret <= 0) { bool ratelimit = true; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_str(&buf, "btree node read error: no device to read from\n at "); @@ -1835,7 +1825,6 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, DEFAULT_RATELIMIT_BURST); if (!ratelimit || __ratelimit(&rs)) bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); set_btree_node_read_error(b); clear_btree_node_read_in_flight(b); @@ -1917,9 +1906,8 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, bch2_btree_node_read(trans, b, true); if (btree_node_read_error(b)) { - mutex_lock(&c->btree_cache.lock); - bch2_btree_node_hash_remove(&c->btree_cache, b); - mutex_unlock(&c->btree_cache.lock); + scoped_guard(mutex, &c->btree_cache.lock) + bch2_btree_node_hash_remove(&c->btree_cache, b); ret = bch_err_throw(c, btree_node_read_error); goto err; @@ -1936,7 +1924,8 @@ err: int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, const struct bkey_i *k, unsigned level) { - return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level)); + CLASS(btree_trans, trans)(c); + return __bch2_btree_root_read(trans, id, k, level); } struct btree_node_scrub { @@ -2015,7 +2004,7 @@ static void btree_node_scrub_work(struct work_struct *work) { struct btree_node_scrub *scrub = container_of(work, struct btree_node_scrub, work); struct bch_fs *c = scrub->c; - struct printbuf err = PRINTBUF; + CLASS(printbuf, err)(); __bch2_btree_pos_to_text(&err, c, scrub->btree, scrub->level, bkey_i_to_s_c(scrub->key.k)); @@ -2030,7 +2019,6 @@ static void btree_node_scrub_work(struct work_struct *work) bch_err_fn_ratelimited(c, ret); } - printbuf_exit(&err); bch2_bkey_buf_exit(&scrub->key, c);; btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf); enumerated_ref_put(&scrub->ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scrub); @@ -2211,7 +2199,8 @@ static void btree_node_write_work(struct work_struct *work) } } else { - ret = bch2_trans_do(c, + CLASS(btree_trans, trans)(c); + ret = lockrestart_do(trans, bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, BCH_WATERMARK_interior_updates| BCH_TRANS_COMMIT_journal_reclaim| @@ -2230,11 +2219,10 @@ err: set_btree_node_noevict(b); if (!bch2_err_matches(ret, EROFS)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret)); bch2_btree_pos_to_text(&buf, c, b); bch2_fs_fatal_error(c, "%s", buf.buf); - printbuf_exit(&buf); } goto out; } @@ -2253,13 +2241,12 @@ static void btree_node_write_endio(struct bio *bio) wbio->submit_time, !bio->bi_status); if (ca && bio->bi_status) { - struct printbuf buf = PRINTBUF; - buf.atomic++; + CLASS(printbuf, buf)(); + guard(printbuf_atomic)(&buf); prt_printf(&buf, "btree write error: %s\n ", bch2_blk_status_to_str(bio->bi_status)); bch2_btree_pos_to_text(&buf, c, b); bch_err_dev_ratelimited(ca, "%s", buf.buf); - printbuf_exit(&buf); } if (bio->bi_status) { @@ -2553,9 +2540,14 @@ do_write: } count_event(c, btree_node_write); + /* + * blk-wbt.c throttles all writes except those that have both REQ_SYNC + * and REQ_IDLE set... + */ + wbio = container_of(bio_alloc_bioset(NULL, buf_pages(data, sectors_to_write << 9), - REQ_OP_WRITE|REQ_META, + REQ_OP_WRITE|REQ_META|REQ_SYNC|REQ_IDLE, GFP_NOFS, &c->btree_bio), struct btree_write_bio, wbio.bio); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 7463946898c0..cc771affa511 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -903,7 +903,7 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, k = bch2_btree_and_journal_iter_peek(&jiter); if (!k.k) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "node not found at pos "); bch2_bpos_to_text(&buf, path->pos); @@ -911,7 +911,6 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, bch2_btree_pos_to_text(&buf, c, l->b); ret = bch2_fs_topology_error(c, "%s", buf.buf); - printbuf_exit(&buf); goto err; } @@ -930,7 +929,7 @@ static noinline_for_stack int btree_node_missing_err(struct btree_trans *trans, struct btree_path *path) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "node not found at pos "); bch2_bpos_to_text(&buf, path->pos); @@ -1451,7 +1450,7 @@ void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_ static void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans) { #ifdef CONFIG_BCACHEFS_DEBUG - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_prt_backtrace(&buf, &trans->last_restarted_trace); panic("in transaction restart: %s, last restarted by\n%s", bch2_err_str(trans->restarted), @@ -1601,13 +1600,13 @@ void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans) static noinline __cold void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); + bch2_log_msg_start(trans->c, &buf); __bch2_trans_paths_to_text(&buf, trans, nosort); bch2_trans_updates_to_text(&buf, trans); bch2_print_str(trans->c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } noinline __cold @@ -1620,22 +1619,19 @@ noinline __cold static void bch2_trans_update_max_paths(struct btree_trans *trans) { struct btree_transaction_stats *s = btree_trans_stats(trans); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); size_t nr = bitmap_weight(trans->paths_allocated, trans->nr_paths); bch2_trans_paths_to_text(&buf, trans); if (!buf.allocation_failure) { - mutex_lock(&s->lock); + guard(mutex)(&s->lock); if (nr > s->nr_max_paths) { s->nr_max_paths = nr; swap(s->max_paths_text, buf.buf); } - mutex_unlock(&s->lock); } - printbuf_exit(&buf); - trans->nr_paths_max = nr; } @@ -1643,11 +1639,10 @@ noinline __cold int __bch2_btree_trans_too_many_iters(struct btree_trans *trans) { if (trace_trans_restart_too_many_iters_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_trans_paths_to_text(&buf, trans); trace_trans_restart_too_many_iters(trans, _THIS_IP_, buf.buf); - printbuf_exit(&buf); } count_event(trans->c, trans_restart_too_many_iters); @@ -3196,14 +3191,13 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) { #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "bump allocator exceeded BTREE_TRANS_MEM_MAX (%u)\n", BTREE_TRANS_MEM_MAX); bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); #endif } @@ -3213,7 +3207,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long struct btree_transaction_stats *s = btree_trans_stats(trans); if (new_bytes > s->max_mem) { - mutex_lock(&s->lock); + guard(mutex)(&s->lock); #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE darray_resize(&s->trans_kmalloc_trace, trans->trans_kmalloc_trace.nr); s->trans_kmalloc_trace.nr = min(s->trans_kmalloc_trace.size, @@ -3225,7 +3219,6 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long s->trans_kmalloc_trace.nr); #endif s->max_mem = new_bytes; - mutex_unlock(&s->lock); } if (trans->used_mempool || new_bytes > BTREE_TRANS_MEM_MAX) { @@ -3535,7 +3528,7 @@ static void check_btree_paths_leaked(struct btree_trans *trans) struct btree_path *path; unsigned i; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "btree paths leaked from %s!\n", trans->fn); @@ -3547,7 +3540,6 @@ static void check_btree_paths_leaked(struct btree_trans *trans) bch2_fs_emergency_read_only2(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } } #else @@ -3672,11 +3664,11 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) /* trans->paths is rcu protected vs. freeing */ guard(rcu)(); - out->atomic++; + guard(printbuf_atomic)(out); struct btree_path *paths = rcu_dereference(trans->paths); if (!paths) - goto out; + return; unsigned long *paths_allocated = trans_paths_allocated(paths); @@ -3712,8 +3704,6 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) bch2_btree_bkey_cached_common_to_text(out, b); prt_newline(out); } -out: - --out->atomic; } void bch2_fs_btree_iter_exit(struct bch_fs *c) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index cc2c6bb6b6a8..53074ed62e09 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -1007,13 +1007,19 @@ static inline void class_btree_trans_destructor(struct btree_trans **p) #define class_btree_trans_constructor(_c) bch2_trans_get(_c) +/* deprecated, prefer CLASS(btree_trans) */ #define bch2_trans_run(_c, _do) \ ({ \ CLASS(btree_trans, trans)(_c); \ (_do); \ }) -#define bch2_trans_do(_c, _do) bch2_trans_run(_c, lockrestart_do(trans, _do)) +/* deprecated, prefer CLASS(btree_trans) */ +#define bch2_trans_do(_c, _do) \ +({ \ + CLASS(btree_trans, trans)(_c); \ + lockrestart_do(trans, _do); \ +}) void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *); diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index ea839560a136..24f2fbe84ad7 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -5,6 +5,7 @@ #include "bset.h" #include "btree_cache.h" #include "btree_journal_iter.h" +#include "disk_accounting.h" #include "journal_io.h" #include <linux/sort.h> @@ -278,12 +279,23 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, if (idx < keys->size && journal_key_cmp(&n, &keys->data[idx]) == 0) { + struct bkey_i *o = keys->data[idx].k; + + if (k->k.type == KEY_TYPE_accounting && + o->k.type == KEY_TYPE_accounting) { + if (!keys->data[idx].allocated) + goto insert; + + bch2_accounting_accumulate(bkey_i_to_accounting(k), + bkey_i_to_s_c_accounting(o)); + } + if (keys->data[idx].allocated) kfree(keys->data[idx].k); keys->data[idx] = n; return 0; } - +insert: if (idx > keys->gap) idx -= keys->size - keys->nr; @@ -450,9 +462,8 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree, keys->data[idx].level == level && bpos_eq(keys->data[idx].k->k.p, pos) && !keys->data[idx].overwritten) { - mutex_lock(&keys->overwrite_lock); + guard(mutex)(&keys->overwrite_lock); __bch2_journal_key_overwritten(keys, idx); - mutex_unlock(&keys->overwrite_lock); } } @@ -803,7 +814,7 @@ void bch2_shoot_down_journal_keys(struct bch_fs *c, enum btree_id btree, void bch2_journal_keys_dump(struct bch_fs *c) { struct journal_keys *keys = &c->journal_keys; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); pr_info("%zu keys:", keys->nr); @@ -817,7 +828,6 @@ void bch2_journal_keys_dump(struct bch_fs *c) bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k)); pr_err("%s", buf.buf); } - printbuf_exit(&buf); } void bch2_fs_journal_keys_init(struct bch_fs *c) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 19d1bb806395..ebba14da92b4 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -301,13 +301,12 @@ static noinline_for_stack void do_trace_key_cache_fill(struct btree_trans *trans struct btree_path *ck_path, struct bkey_s_c k) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bpos_to_text(&buf, ck_path->pos); prt_char(&buf, ' '); bch2_bkey_val_to_text(&buf, trans->c, k); trace_key_cache_fill(trans, buf.buf); - printbuf_exit(&buf); } static noinline int btree_key_cache_fill(struct btree_trans *trans, @@ -540,10 +539,10 @@ int bch2_btree_key_cache_journal_flush(struct journal *j, struct bkey_cached *ck = container_of(pin, struct bkey_cached, journal); struct bkey_cached_key key; - struct btree_trans *trans = bch2_trans_get(c); int srcu_idx = srcu_read_lock(&c->btree_trans_barrier); int ret = 0; + CLASS(btree_trans, trans)(c); btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_read); key = ck->key; @@ -566,8 +565,6 @@ int bch2_btree_key_cache_journal_flush(struct journal *j, BCH_TRANS_COMMIT_journal_reclaim, false)); unlock: srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); - - bch2_trans_put(trans); return ret; } diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index bed2b4b6ffb9..38c5643e8a78 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -159,13 +159,11 @@ static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans count_event(c, trans_restart_would_deadlock); if (trace_trans_restart_would_deadlock_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); + guard(printbuf_atomic)(&buf); - buf.atomic++; print_cycle(&buf, g); - trace_trans_restart_would_deadlock(trans, buf.buf); - printbuf_exit(&buf); } } @@ -196,8 +194,8 @@ static int btree_trans_abort_preference(struct btree_trans *trans) static noinline __noreturn void break_cycle_fail(struct lock_graph *g) { - struct printbuf buf = PRINTBUF; - buf.atomic++; + CLASS(printbuf, buf)(); + guard(printbuf_atomic)(&buf); prt_printf(&buf, bch2_fmt(g->g->trans->c, "cycle of nofail locks")); @@ -214,7 +212,6 @@ static noinline __noreturn void break_cycle_fail(struct lock_graph *g) } bch2_print_str(g->g->trans->c, KERN_ERR, buf.buf); - printbuf_exit(&buf); BUG(); } @@ -692,7 +689,7 @@ int __bch2_btree_path_upgrade(struct btree_trans *trans, count_event(trans->c, trans_restart_upgrade); if (trace_trans_restart_upgrade_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "%s %pS\n", trans->fn, (void *) _RET_IP_); prt_printf(&buf, "btree %s pos\n", bch2_btree_id_str(path->btree_id)); @@ -708,7 +705,6 @@ int __bch2_btree_path_upgrade(struct btree_trans *trans, path->l[f.l].lock_seq); trace_trans_restart_upgrade(trans->c, buf.buf); - printbuf_exit(&buf); } out: bch2_trans_verify_locks(trans); @@ -777,7 +773,7 @@ static noinline __cold void bch2_trans_relock_fail(struct btree_trans *trans, st goto out; if (trace_trans_restart_relock_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bpos_to_text(&buf, path->pos); prt_printf(&buf, " %s l=%u seq=%u node seq=", @@ -797,7 +793,6 @@ static noinline __cold void bch2_trans_relock_fail(struct btree_trans *trans, st } trace_trans_restart_relock(trans, ip, buf.buf); - printbuf_exit(&buf); } count_event(trans->c, trans_restart_relock); diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index cc7af8fe689e..d997e3818c30 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -65,16 +65,6 @@ static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_n memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs); } -static inline u64 bkey_journal_seq(struct bkey_s_c k) -{ - switch (k.k->type) { - case KEY_TYPE_inode_v3: - return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_journal_seq); - default: - return 0; - } -} - static int found_btree_node_cmp_cookie(const void *_l, const void *_r) { const struct found_btree_node *l = _l; @@ -196,25 +186,25 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, CLASS(printbuf, buf)(); if (!bch2_btree_node_read_done(c, ca, b, NULL, &buf)) { + /* read_done will swap out b->data for another buffer */ + bn = b->data; /* * Grab journal_seq here because we want the max journal_seq of * any bset; read_done sorts down to a single set and picks the * max journal_seq */ - n.journal_seq = le64_to_cpu(b->data->keys.journal_seq), + n.journal_seq = le64_to_cpu(bn->keys.journal_seq), n.sectors_written = b->written; - mutex_lock(&f->lock); + guard(mutex)(&f->lock); if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) { bch_err(c, "try_read_btree_node() can't handle endian conversion"); f->ret = -EINVAL; - goto unlock; + return; } if (darray_push(&f->nodes, n)) f->ret = -ENOMEM; -unlock: - mutex_unlock(&f->lock); } } @@ -224,15 +214,17 @@ static int read_btree_nodes_worker(void *p) struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes); struct bch_dev *ca = w->ca; unsigned long last_print = jiffies; + struct btree *b = NULL; + struct bio *bio = NULL; - struct btree *b = __bch2_btree_node_mem_alloc(c); + b = __bch2_btree_node_mem_alloc(c); if (!b) { bch_err(c, "read_btree_nodes_worker: error allocating buf"); w->f->ret = -ENOMEM; goto err; } - struct bio *bio = bio_alloc(NULL, buf_pages(b->data, c->opts.btree_node_size), 0, GFP_KERNEL); + bio = bio_alloc(NULL, buf_pages(b->data, c->opts.btree_node_size), 0, GFP_KERNEL); if (!bio) { bch_err(c, "read_btree_nodes_worker: error allocating bio"); w->f->ret = -ENOMEM; @@ -367,7 +359,7 @@ static int handle_overwrites(struct bch_fs *c, int bch2_scan_for_btree_nodes(struct bch_fs *c) { struct find_btree_nodes *f = &c->found_btree_nodes; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); found_btree_nodes nodes_heap = {}; size_t dst; int ret = 0; @@ -474,7 +466,6 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL); err: darray_exit(&nodes_heap); - printbuf_exit(&buf); return ret; } @@ -546,7 +537,7 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, return ret; if (c->opts.verbose) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "recovery "); bch2_btree_id_level_to_text(&buf, btree, level); @@ -556,7 +547,6 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, bch2_bpos_to_text(&buf, node_max); bch_info(c, "%s(): %s", __func__, buf.buf); - printbuf_exit(&buf); } struct found_btree_node search = { @@ -580,10 +570,9 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, found_btree_node_to_key(&tmp.k, &n); if (c->opts.verbose) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k)); bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); - printbuf_exit(&buf); } BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 7fcf248a9a76..58590ccc26bd 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -235,10 +235,10 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, struct bch_fs *c = container_of(j, struct bch_fs, journal); struct btree_write *w = container_of(pin, struct btree_write, journal); struct btree *b = container_of(w, struct btree, writes[i]); - struct btree_trans *trans = bch2_trans_get(c); unsigned long old, new; unsigned idx = w - b->writes; + CLASS(btree_trans, trans)(c); btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); old = READ_ONCE(b->flags); @@ -257,8 +257,6 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, btree_node_write_if_need(trans, b, SIX_LOCK_read); six_unlock_read(&b->c.lock); - - bch2_trans_put(trans); return 0; } @@ -591,7 +589,8 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans) } static inline int -bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, +bch2_trans_commit_write_locked(struct btree_trans *trans, + enum bch_trans_commit_flags flags, struct btree_insert_entry **stopped_at, unsigned long trace_ip) { @@ -673,16 +672,20 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, struct bkey_i *accounting; - percpu_down_read(&c->mark_lock); - for (accounting = btree_trans_subbuf_base(trans, &trans->accounting); - accounting != btree_trans_subbuf_top(trans, &trans->accounting); - accounting = bkey_next(accounting)) { - ret = bch2_accounting_trans_commit_hook(trans, - bkey_i_to_accounting(accounting), flags); - if (ret) - goto revert_fs_usage; - } - percpu_up_read(&c->mark_lock); + scoped_guard(percpu_read, &c->mark_lock) + for (accounting = btree_trans_subbuf_base(trans, &trans->accounting); + accounting != btree_trans_subbuf_top(trans, &trans->accounting); + accounting = bkey_next(accounting)) { + ret = bch2_accounting_trans_commit_hook(trans, + bkey_i_to_accounting(accounting), flags); + if (unlikely(ret)) { + for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting); + i != accounting; + i = bkey_next(i)) + bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags); + return ret; + } + } /* XXX: we only want to run this if deltas are nonzero */ bch2_trans_account_disk_usage_change(trans); @@ -769,12 +772,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, trans->journal_res.offset += trans->journal_entries.u64s; trans->journal_res.u64s -= trans->journal_entries.u64s; - memcpy_u64s_small(bch2_journal_add_entry(j, &trans->journal_res, - BCH_JSET_ENTRY_write_buffer_keys, - BTREE_ID_accounting, 0, - trans->accounting.u64s)->_data, - btree_trans_subbuf_base(trans, &trans->accounting), - trans->accounting.u64s); + if (trans->accounting.u64s) + memcpy_u64s_small(bch2_journal_add_entry(j, &trans->journal_res, + BCH_JSET_ENTRY_write_buffer_keys, + BTREE_ID_accounting, 0, + trans->accounting.u64s)->_data, + btree_trans_subbuf_base(trans, &trans->accounting), + trans->accounting.u64s); if (trans->journal_seq) *trans->journal_seq = trans->journal_res.seq; @@ -794,13 +798,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, return 0; fatal_err: bch2_fs_fatal_error(c, "fatal error in transaction commit: %s", bch2_err_str(ret)); - percpu_down_read(&c->mark_lock); -revert_fs_usage: - for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting); - i != accounting; - i = bkey_next(i)) - bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags); - percpu_up_read(&c->mark_lock); return ret; } @@ -826,7 +823,8 @@ static int bch2_trans_commit_journal_pin_flush(struct journal *j, /* * Get journal reservation, take write locks, and attempt to do btree update(s): */ -static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags, +static inline int do_bch2_trans_commit(struct btree_trans *trans, + enum bch_trans_commit_flags flags, struct btree_insert_entry **stopped_at, unsigned long trace_ip) { @@ -962,16 +960,33 @@ out: * do. */ static noinline int -do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) +do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans, + enum bch_trans_commit_flags flags) { struct bch_fs *c = trans->c; + int ret = 0; BUG_ON(current != c->recovery_task); + struct bkey_i *accounting; + + percpu_down_read(&c->mark_lock); + for (accounting = btree_trans_subbuf_base(trans, &trans->accounting); + accounting != btree_trans_subbuf_top(trans, &trans->accounting); + accounting = bkey_next(accounting)) { + ret = likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply)) + ? bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(accounting), + BCH_ACCOUNTING_normal, false) + : 0; + if (ret) + goto revert_fs_usage; + } + percpu_up_read(&c->mark_lock); + trans_for_each_update(trans, i) { - int ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k); + ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k); if (ret) - return ret; + goto fatal_err; } for (struct jset_entry *i = btree_trans_journal_entries_start(trans); @@ -980,9 +995,9 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) if (i->type == BCH_JSET_ENTRY_btree_keys || i->type == BCH_JSET_ENTRY_write_buffer_keys) { jset_entry_for_each_key(i, k) { - int ret = bch2_journal_key_insert(c, i->btree_id, i->level, k); + ret = bch2_journal_key_insert(c, i->btree_id, i->level, k); if (ret) - return ret; + goto fatal_err; } } @@ -1000,15 +1015,27 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting); i != btree_trans_subbuf_top(trans, &trans->accounting); i = bkey_next(i)) { - int ret = bch2_journal_key_insert(c, BTREE_ID_accounting, 0, i); + ret = bch2_journal_key_insert(c, BTREE_ID_accounting, 0, i); if (ret) - return ret; + goto fatal_err; } return 0; +fatal_err: + bch2_fs_fatal_error(c, "fatal error in transaction commit: %s", bch2_err_str(ret)); + percpu_down_read(&c->mark_lock); +revert_fs_usage: + BUG(); + /* error path not handled by __bch2_trans_commit() */ + for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting); + i != accounting; + i = bkey_next(i)) + bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags); + percpu_up_read(&c->mark_lock); + return ret; } -int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) +int __bch2_trans_commit(struct btree_trans *trans, enum bch_trans_commit_flags flags) { struct btree_insert_entry *errored_at = NULL; struct bch_fs *c = trans->c; @@ -1031,7 +1058,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) if (!(flags & BCH_TRANS_COMMIT_no_check_rw) && unlikely(!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_trans))) { if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) - ret = do_bch2_trans_commit_to_journal_replay(trans); + ret = do_bch2_trans_commit_to_journal_replay(trans, flags); else ret = bch_err_throw(c, erofs_trans_commit); goto out_reset; @@ -1039,11 +1066,15 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); - journal_u64s = jset_u64s(trans->accounting.u64s); + journal_u64s = 0; + trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names); if (trans->journal_transaction_names) journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s); + if (trans->accounting.u64s) + journal_u64s += jset_u64s(trans->accounting.u64s); + trans_for_each_update(trans, i) { struct btree_path *path = trans->paths + i->path; diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 5d9e02370aff..f514a8ad7a89 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -661,21 +661,23 @@ int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id, * @k: key to insert * @disk_res: must be non-NULL whenever inserting or potentially * splitting data extents - * @flags: transaction commit flags + * @commit_flags: transaction commit flags * @iter_flags: btree iter update trigger flags * * Returns: 0 on success, error code on failure */ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, - struct disk_reservation *disk_res, int flags, + struct disk_reservation *disk_res, + enum bch_trans_commit_flags commit_flags, enum btree_iter_update_trigger_flags iter_flags) { - return bch2_trans_commit_do(c, disk_res, NULL, flags, - bch2_btree_insert_trans(trans, id, k, iter_flags)); + CLASS(btree_trans, trans)(c); + return commit_do(trans, disk_res, NULL, commit_flags, + bch2_btree_insert_trans(trans, id, k, iter_flags)); } -int bch2_btree_delete_at(struct btree_trans *trans, - struct btree_iter *iter, unsigned update_flags) +int bch2_btree_delete_at(struct btree_trans *trans, struct btree_iter *iter, + enum btree_iter_update_trigger_flags flags) { struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); int ret = PTR_ERR_OR_ZERO(k); @@ -684,12 +686,12 @@ int bch2_btree_delete_at(struct btree_trans *trans, bkey_init(&k->k); k->k.p = iter->pos; - return bch2_trans_update(trans, iter, k, update_flags); + return bch2_trans_update(trans, iter, k, flags); } int bch2_btree_delete(struct btree_trans *trans, enum btree_id btree, struct bpos pos, - unsigned update_flags) + enum btree_iter_update_trigger_flags flags) { struct btree_iter iter; int ret; @@ -698,7 +700,7 @@ int bch2_btree_delete(struct btree_trans *trans, BTREE_ITER_cached| BTREE_ITER_intent); ret = bch2_btree_iter_traverse(trans, &iter) ?: - bch2_btree_delete_at(trans, &iter, update_flags); + bch2_btree_delete_at(trans, &iter, flags); bch2_trans_iter_exit(trans, &iter); return ret; @@ -706,7 +708,7 @@ int bch2_btree_delete(struct btree_trans *trans, int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, struct bpos start, struct bpos end, - unsigned update_flags, + enum btree_iter_update_trigger_flags flags, u64 *journal_seq) { u32 restart_count = trans->restart_count; @@ -714,7 +716,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, struct bkey_s_c k; int ret = 0; - bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent); + bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent|flags); while ((k = bch2_btree_iter_peek_max(trans, &iter, end)).k) { struct disk_reservation disk_res = bch2_disk_reservation_init(trans->c, 0); @@ -747,7 +749,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, bpos_min(end, k.k->p).offset - iter.pos.offset); - ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?: + ret = bch2_trans_update(trans, &iter, &delete, flags) ?: bch2_trans_commit(trans, &disk_res, journal_seq, BCH_TRANS_COMMIT_no_enospc); bch2_disk_reservation_put(trans->c, &disk_res); @@ -777,12 +779,11 @@ err: */ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, struct bpos start, struct bpos end, - unsigned update_flags, + enum btree_iter_update_trigger_flags flags, u64 *journal_seq) { - int ret = bch2_trans_run(c, - bch2_btree_delete_range_trans(trans, id, start, end, - update_flags, journal_seq)); + CLASS(btree_trans, trans)(c); + int ret = bch2_btree_delete_range_trans(trans, id, start, end, flags, journal_seq); if (ret == -BCH_ERR_transaction_restart_nested) ret = 0; return ret; @@ -876,31 +877,31 @@ static int __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, va_list args) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_vprintf(&buf, fmt, args); unsigned u64s = DIV_ROUND_UP(buf.pos, sizeof(u64)); int ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; if (ret) - goto err; + return ret; if (!test_bit(JOURNAL_running, &c->journal.flags)) { ret = darray_make_room(&c->journal.early_journal_entries, jset_u64s(u64s)); if (ret) - goto err; + return ret; struct jset_entry_log *l = (void *) &darray_top(c->journal.early_journal_entries); journal_entry_init(&l->entry, BCH_JSET_ENTRY_log, 0, 1, u64s); memcpy_and_pad(l->d, u64s * sizeof(u64), buf.buf, buf.pos, 0); c->journal.early_journal_entries.nr += jset_u64s(u64s); } else { - ret = bch2_trans_commit_do(c, NULL, NULL, commit_flags, - bch2_trans_log_msg(trans, &buf)); + CLASS(btree_trans, trans)(c); + ret = commit_do(trans, NULL, NULL, commit_flags, + bch2_trans_log_msg(trans, &buf)); } -err: - printbuf_exit(&buf); - return ret; + + return 0; } __printf(2, 3) diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 2c6f9b44d888..633de3b3ac28 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -47,22 +47,27 @@ enum bch_trans_commit_flags { void bch2_trans_commit_flags_to_text(struct printbuf *, enum bch_trans_commit_flags); -int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned); -int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned); +int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, + enum btree_iter_update_trigger_flags); +int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, + enum btree_iter_update_trigger_flags); int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id, struct bkey_i *, enum btree_iter_update_trigger_flags); int bch2_btree_insert_trans(struct btree_trans *, enum btree_id, struct bkey_i *, enum btree_iter_update_trigger_flags); -int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, struct - disk_reservation *, int flags, enum - btree_iter_update_trigger_flags iter_flags); +int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, + struct disk_reservation *, + enum bch_trans_commit_flags, + enum btree_iter_update_trigger_flags); int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id, - struct bpos, struct bpos, unsigned, u64 *); + struct bpos, struct bpos, + enum btree_iter_update_trigger_flags, u64 *); int bch2_btree_delete_range(struct bch_fs *, enum btree_id, - struct bpos, struct bpos, unsigned, u64 *); + struct bpos, struct bpos, + enum btree_iter_update_trigger_flags, u64 *); int bch2_btree_bit_mod_iter(struct btree_trans *, struct btree_iter *, bool); int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool); @@ -186,19 +191,29 @@ int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bk int bch2_btree_write_buffer_insert_err(struct bch_fs *, enum btree_id, struct bkey_i *); +static inline int bch2_btree_write_buffer_insert_checks(struct bch_fs *c, enum btree_id btree, + struct bkey_i *k) +{ + if (unlikely(!btree_type_uses_write_buffer(btree) || + k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX)) { + int ret = bch2_btree_write_buffer_insert_err(c, btree, k); + dump_stack(); + return ret; + } + + return 0; +} + static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans, enum btree_id btree, struct bkey_i *k) { kmsan_check_memory(k, bkey_bytes(&k->k)); - EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); - - if (unlikely(!btree_type_uses_write_buffer(btree))) { - int ret = bch2_btree_write_buffer_insert_err(trans->c, btree, k); - dump_stack(); + int ret = bch2_btree_write_buffer_insert_checks(trans->c, btree, k); + if (unlikely(ret)) return ret; - } + /* * Most updates skip the btree write buffer until journal replay is * finished because synchronization with journal replay relies on having @@ -215,7 +230,7 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr return bch2_btree_insert_clone_trans(trans, btree, k); struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s)); - int ret = PTR_ERR_OR_ZERO(e); + ret = PTR_ERR_OR_ZERO(e); if (ret) return ret; @@ -226,7 +241,7 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr void bch2_trans_commit_hook(struct btree_trans *, struct btree_trans_commit_hook *); -int __bch2_trans_commit(struct btree_trans *, unsigned); +int __bch2_trans_commit(struct btree_trans *, enum bch_trans_commit_flags); int bch2_trans_log_str(struct btree_trans *, const char *); int bch2_trans_log_msg(struct btree_trans *, struct printbuf *); @@ -263,6 +278,7 @@ static inline int bch2_trans_commit(struct btree_trans *trans, nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\ (_journal_seq), (_flags))) +/* deprecated, prefer CLASS(btree_trans) */ #define bch2_trans_commit_do(_c, _disk_res, _journal_seq, _flags, _do) \ bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do)) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 8e3d3db2c53b..312ef203b27b 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -53,7 +53,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) : b->data->min_key; struct btree_and_journal_iter iter; struct bkey_s_c k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct bkey_buf prev; int ret = 0; @@ -133,7 +133,6 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) out: bch2_btree_and_journal_iter_exit(&iter); bch2_bkey_buf_exit(&prev, c); - printbuf_exit(&buf); return ret; err: bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); @@ -240,9 +239,8 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, __btree_node_free(trans, b); - mutex_lock(&c->btree_cache.lock); - bch2_btree_node_hash_remove(&c->btree_cache, b); - mutex_unlock(&c->btree_cache.lock); + scoped_guard(mutex, &c->btree_cache.lock) + bch2_btree_node_hash_remove(&c->btree_cache, b); six_unlock_write(&b->c.lock); mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED); @@ -268,9 +266,8 @@ static void bch2_btree_node_free_never_used(struct btree_update *as, clear_btree_node_dirty_acct(c, b); clear_btree_node_need_write(b); - mutex_lock(&c->btree_cache.lock); - __bch2_btree_node_hash_remove(&c->btree_cache, b); - mutex_unlock(&c->btree_cache.lock); + scoped_guard(mutex, &c->btree_cache.lock) + __bch2_btree_node_hash_remove(&c->btree_cache, b); BUG_ON(p->nr >= ARRAY_SIZE(p->b)); p->b[p->nr++] = b; @@ -285,7 +282,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct closure *cl, bool interior_node, unsigned target, - unsigned flags) + enum bch_trans_commit_flags flags) { struct bch_fs *c = trans->c; struct write_point *wp; @@ -305,13 +302,18 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, mutex_lock(&c->btree_reserve_cache_lock); if (c->btree_reserve_cache_nr > nr_reserve) { - struct btree_alloc *a = - &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; - - bkey_copy(&b->key, &a->k); - b->ob = a->ob; - mutex_unlock(&c->btree_reserve_cache_lock); - goto out; + for (struct btree_alloc *a = c->btree_reserve_cache; + a < c->btree_reserve_cache + c->btree_reserve_cache_nr; + a++) { + if (target && !bch2_bkey_in_target(c, bkey_i_to_s_c(&a->k), target)) + continue; + + bkey_copy(&b->key, &a->k); + b->ob = a->ob; + *a = c->btree_reserve_cache[--c->btree_reserve_cache_nr]; + mutex_unlock(&c->btree_reserve_cache_lock); + goto out; + } } mutex_unlock(&c->btree_reserve_cache_lock); retry: @@ -555,7 +557,8 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans * bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total], as->start_time); - mutex_lock(&c->btree_interior_update_lock); + guard(mutex)(&c->btree_interior_update_lock); + list_del(&as->unwritten_list); list_del(&as->list); @@ -567,8 +570,6 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans * * since being on btree_interior_update_list is our ref on @c: */ closure_wake_up(&c->btree_interior_update_wait); - - mutex_unlock(&c->btree_interior_update_lock); } static void btree_update_add_key(struct btree_update *as, @@ -597,12 +598,11 @@ static void btree_update_new_nodes_mark_sb(struct btree_update *as) { struct bch_fs *c = as->c; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); for_each_keylist_key(&as->new_keys, k) bch2_dev_btree_bitmap_mark(c, bkey_i_to_s_c(k)); bch2_write_super(c); - mutex_unlock(&c->sb_lock); } /* @@ -654,7 +654,7 @@ static void btree_update_nodes_written(struct btree_update *as) { struct bch_fs *c = as->c; struct btree *b; - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); u64 journal_seq = 0; unsigned i; int ret; @@ -813,15 +813,15 @@ err: bch2_journal_pin_drop(&c->journal, &as->journal); - mutex_lock(&c->btree_interior_update_lock); - for (i = 0; i < as->nr_new_nodes; i++) { - b = as->new_nodes[i]; + scoped_guard(mutex, &c->btree_interior_update_lock) { + for (i = 0; i < as->nr_new_nodes; i++) { + b = as->new_nodes[i]; - BUG_ON(b->will_make_reachable != (unsigned long) as); - b->will_make_reachable = 0; - clear_btree_node_will_make_reachable(b); + BUG_ON(b->will_make_reachable != (unsigned long) as); + b->will_make_reachable = 0; + clear_btree_node_will_make_reachable(b); + } } - mutex_unlock(&c->btree_interior_update_lock); for (i = 0; i < as->nr_new_nodes; i++) { b = as->new_nodes[i]; @@ -835,7 +835,6 @@ err: bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]); bch2_btree_update_free(as, trans); - bch2_trans_put(trans); } static void btree_interior_update_work(struct work_struct *work) @@ -845,12 +844,12 @@ static void btree_interior_update_work(struct work_struct *work) struct btree_update *as; while (1) { - mutex_lock(&c->btree_interior_update_lock); - as = list_first_entry_or_null(&c->btree_interior_updates_unwritten, - struct btree_update, unwritten_list); - if (as && !as->nodes_written) - as = NULL; - mutex_unlock(&c->btree_interior_update_lock); + scoped_guard(mutex, &c->btree_interior_update_lock) { + as = list_first_entry_or_null(&c->btree_interior_updates_unwritten, + struct btree_update, unwritten_list); + if (as && !as->nodes_written) + as = NULL; + } if (!as) break; @@ -864,9 +863,8 @@ static CLOSURE_CALLBACK(btree_update_set_nodes_written) closure_type(as, struct btree_update, cl); struct bch_fs *c = as->c; - mutex_lock(&c->btree_interior_update_lock); - as->nodes_written = true; - mutex_unlock(&c->btree_interior_update_lock); + scoped_guard(mutex, &c->btree_interior_update_lock) + as->nodes_written = true; queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work); } @@ -884,7 +882,7 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b) BUG_ON(!btree_node_dirty(b)); BUG_ON(!b->c.level); - mutex_lock(&c->btree_interior_update_lock); + guard(mutex)(&c->btree_interior_update_lock); list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); as->mode = BTREE_UPDATE_node; @@ -893,8 +891,6 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b) set_btree_node_write_blocked(b); list_add(&as->write_blocked_list, &b->write_blocked); - - mutex_unlock(&c->btree_interior_update_lock); } static int bch2_update_reparent_journal_pin_flush(struct journal *j, @@ -933,11 +929,11 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b) b->c.btree_id, b->c.level, insert, insert->k.u64s); - mutex_lock(&c->btree_interior_update_lock); - list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); + scoped_guard(mutex, &c->btree_interior_update_lock) { + list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); - as->mode = BTREE_UPDATE_root; - mutex_unlock(&c->btree_interior_update_lock); + as->mode = BTREE_UPDATE_root; + } } /* @@ -958,7 +954,8 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree closure_get(&as->cl); - mutex_lock(&c->btree_interior_update_lock); + guard(mutex)(&c->btree_interior_update_lock); + BUG_ON(as->nr_new_nodes >= ARRAY_SIZE(as->new_nodes)); BUG_ON(b->will_make_reachable); @@ -966,8 +963,6 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree b->will_make_reachable = 1UL|(unsigned long) as; set_btree_node_will_make_reachable(b); - mutex_unlock(&c->btree_interior_update_lock); - btree_update_add_key(as, &as->new_keys, b); if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { @@ -986,31 +981,29 @@ static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b) { struct btree_update *as; unsigned long v; - unsigned i; - mutex_lock(&c->btree_interior_update_lock); - /* - * When b->will_make_reachable != 0, it owns a ref on as->cl that's - * dropped when it gets written by bch2_btree_complete_write - the - * xchg() is for synchronization with bch2_btree_complete_write: - */ - v = xchg(&b->will_make_reachable, 0); - clear_btree_node_will_make_reachable(b); - as = (struct btree_update *) (v & ~1UL); + scoped_guard(mutex, &c->btree_interior_update_lock) { + /* + * When b->will_make_reachable != 0, it owns a ref on as->cl that's + * dropped when it gets written by bch2_btree_complete_write - the + * xchg() is for synchronization with bch2_btree_complete_write: + */ + v = xchg(&b->will_make_reachable, 0); + clear_btree_node_will_make_reachable(b); + as = (struct btree_update *) (v & ~1UL); - if (!as) { - mutex_unlock(&c->btree_interior_update_lock); - return; - } + if (!as) + return; - for (i = 0; i < as->nr_new_nodes; i++) - if (as->new_nodes[i] == b) - goto found; + unsigned i; + for (i = 0; i < as->nr_new_nodes; i++) + if (as->new_nodes[i] == b) + goto found; - BUG(); -found: - array_remove_item(as->new_nodes, as->nr_new_nodes, i); - mutex_unlock(&c->btree_interior_update_lock); + BUG(); + found: + array_remove_item(as->new_nodes, as->nr_new_nodes, i); + } if (v & 1) closure_put(&as->cl); @@ -1139,7 +1132,8 @@ static const char * const btree_node_reawrite_reason_strs[] = { static struct btree_update * bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, unsigned level_start, bool split, - unsigned target, unsigned flags) + unsigned target, + enum bch_trans_commit_flags flags) { struct bch_fs *c = trans->c; struct btree_update *as; @@ -1226,9 +1220,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, bch2_keylist_init(&as->new_keys, as->_new_keys); bch2_keylist_init(&as->parent_keys, as->inline_keys); - mutex_lock(&c->btree_interior_update_lock); - list_add_tail(&as->list, &c->btree_interior_update_list); - mutex_unlock(&c->btree_interior_update_lock); + scoped_guard(mutex, &c->btree_interior_update_lock) + list_add_tail(&as->list, &c->btree_interior_update_list); struct btree *b = btree_path_node(path, path->level); as->node_start = b->data->min_key; @@ -1312,13 +1305,11 @@ err: static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b) { /* Root nodes cannot be reaped */ - mutex_lock(&c->btree_cache.lock); - list_del_init(&b->list); - mutex_unlock(&c->btree_cache.lock); + scoped_guard(mutex, &c->btree_cache.lock) + list_del_init(&b->list); - mutex_lock(&c->btree_root_lock); - bch2_btree_id_root(c, b->c.btree_id)->b = b; - mutex_unlock(&c->btree_root_lock); + scoped_guard(mutex, &c->btree_root_lock) + bch2_btree_id_root(c, b->c.btree_id)->b = b; bch2_recalc_btree_reserve(c); } @@ -1373,7 +1364,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, { struct bch_fs *c = as->c; struct bkey_packed *k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); unsigned long old, new; BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 && @@ -1418,8 +1409,6 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, new |= BTREE_WRITE_interior; new |= 1 << BTREE_NODE_need_write; } while (!try_cmpxchg(&b->flags, &old, new)); - - printbuf_exit(&buf); } static int @@ -1446,7 +1435,7 @@ bch2_btree_insert_keys_interior(struct btree_update *as, int ret = bch2_btree_node_check_topology(trans, b); if (ret) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); for (struct bkey_i *k = keys->keys; k != insert; @@ -1833,7 +1822,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t bch2_verify_keylist_sorted(keys); if (!btree_node_intent_locked(path, b->c.level)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "%s(): node not locked at level %u\n", __func__, b->c.level); @@ -1842,7 +1831,6 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t bch2_fs_emergency_read_only2(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); return -EIO; } @@ -1965,9 +1953,8 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans * bch2_trans_node_add(trans, path, n); six_unlock_intent(&n->c.lock); - mutex_lock(&c->btree_cache.lock); - list_add_tail(&b->list, &c->btree_cache.live[btree_node_pinned(b)].list); - mutex_unlock(&c->btree_cache.lock); + scoped_guard(mutex, &c->btree_cache.lock) + list_add_tail(&b->list, &c->btree_cache.live[btree_node_pinned(b)].list); bch2_trans_verify_locks(trans); } @@ -2067,7 +2054,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, } if (!bpos_eq(bpos_successor(prev->data->max_key), next->data->min_key)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); printbuf_indent_add_nextline(&buf, 2); prt_printf(&buf, "%s(): ", __func__); @@ -2082,7 +2069,6 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, bch2_bpos_to_text(&buf, next->data->min_key); bch_err(c, "%s", buf.buf); - printbuf_exit(&buf); goto err; } @@ -2222,7 +2208,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, struct btree_iter *iter, struct btree *b, unsigned target, - unsigned flags) + enum bch_trans_commit_flags flags) { struct bch_fs *c = trans->c; struct btree *n, *parent; @@ -2287,7 +2273,8 @@ err: int bch2_btree_node_rewrite_key(struct btree_trans *trans, enum btree_id btree, unsigned level, - struct bkey_i *k, unsigned flags) + struct bkey_i *k, + enum bch_trans_commit_flags flags) { struct btree_iter iter; bch2_trans_node_iter_init(trans, &iter, @@ -2311,7 +2298,7 @@ int bch2_btree_node_rewrite_pos(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bpos pos, unsigned target, - unsigned flags) + enum bch_trans_commit_flags flags) { BUG_ON(!level); @@ -2330,7 +2317,8 @@ err: } int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *trans, - struct btree *b, unsigned flags) + struct btree *b, + enum bch_trans_commit_flags flags) { struct btree_iter iter; int ret = get_iter_to_node(trans, &iter, b); @@ -2363,9 +2351,8 @@ static void async_btree_node_rewrite_work(struct work_struct *work) !bch2_err_matches(ret, EROFS)) bch_err_fn_ratelimited(c, ret); - spin_lock(&c->btree_node_rewrites_lock); - list_del(&a->list); - spin_unlock(&c->btree_node_rewrites_lock); + scoped_guard(spinlock, &c->btree_node_rewrites_lock) + list_del(&a->list); closure_wake_up(&c->btree_node_rewrites_wait); @@ -2390,16 +2377,16 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) bool now = false, pending = false; - spin_lock(&c->btree_node_rewrites_lock); - if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay) && - enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_node_rewrite)) { - list_add(&a->list, &c->btree_node_rewrites); - now = true; - } else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) { - list_add(&a->list, &c->btree_node_rewrites_pending); - pending = true; + scoped_guard(spinlock, &c->btree_node_rewrites_lock) { + if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay) && + enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_node_rewrite)) { + list_add(&a->list, &c->btree_node_rewrites); + now = true; + } else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) { + list_add(&a->list, &c->btree_node_rewrites_pending); + pending = true; + } } - spin_unlock(&c->btree_node_rewrites_lock); if (now) { queue_work(c->btree_node_rewrite_worker, &a->work); @@ -2420,13 +2407,14 @@ void bch2_async_btree_node_rewrites_flush(struct bch_fs *c) void bch2_do_pending_node_rewrites(struct bch_fs *c) { while (1) { - spin_lock(&c->btree_node_rewrites_lock); - struct async_btree_rewrite *a = - list_pop_entry(&c->btree_node_rewrites_pending, - struct async_btree_rewrite, list); - if (a) - list_add(&a->list, &c->btree_node_rewrites); - spin_unlock(&c->btree_node_rewrites_lock); + struct async_btree_rewrite *a; + + scoped_guard(spinlock, &c->btree_node_rewrites_lock) { + a = list_pop_entry(&c->btree_node_rewrites_pending, + struct async_btree_rewrite, list); + if (a) + list_add(&a->list, &c->btree_node_rewrites); + } if (!a) break; @@ -2439,11 +2427,11 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c) void bch2_free_pending_node_rewrites(struct bch_fs *c) { while (1) { - spin_lock(&c->btree_node_rewrites_lock); - struct async_btree_rewrite *a = - list_pop_entry(&c->btree_node_rewrites_pending, - struct async_btree_rewrite, list); - spin_unlock(&c->btree_node_rewrites_lock); + struct async_btree_rewrite *a; + + scoped_guard(spinlock, &c->btree_node_rewrites_lock) + a = list_pop_entry(&c->btree_node_rewrites_pending, + struct async_btree_rewrite, list); if (!a) break; @@ -2525,7 +2513,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, bch2_btree_node_lock_write_nofail(trans, btree_iter_path(trans, iter), &b->c); if (new_hash) { - mutex_lock(&c->btree_cache.lock); + guard(mutex)(&c->btree_cache.lock); bch2_btree_node_hash_remove(&c->btree_cache, new_hash); __bch2_btree_node_hash_remove(&c->btree_cache, b); @@ -2533,7 +2521,6 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, bkey_copy(&b->key, new_key); ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); BUG_ON(ret); - mutex_unlock(&c->btree_cache.lock); } else { bkey_copy(&b->key, new_key); } @@ -2544,9 +2531,8 @@ out: return ret; err: if (new_hash) { - mutex_lock(&c->btree_cache.lock); + guard(mutex)(&c->btree_cache.lock); bch2_btree_node_hash_remove(&c->btree_cache, b); - mutex_unlock(&c->btree_cache.lock); } goto out; } @@ -2681,7 +2667,8 @@ int bch2_btree_root_alloc_fake_trans(struct btree_trans *trans, enum btree_id id void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned level) { - bch2_trans_run(c, lockrestart_do(trans, bch2_btree_root_alloc_fake_trans(trans, id, level))); + CLASS(btree_trans, trans)(c); + lockrestart_do(trans, bch2_btree_root_alloc_fake_trans(trans, id, level)); } static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as) @@ -2714,21 +2701,15 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c) { struct btree_update *as; - mutex_lock(&c->btree_interior_update_lock); + guard(mutex)(&c->btree_interior_update_lock); list_for_each_entry(as, &c->btree_interior_update_list, list) bch2_btree_update_to_text(out, as); - mutex_unlock(&c->btree_interior_update_lock); } static bool bch2_btree_interior_updates_pending(struct bch_fs *c) { - bool ret; - - mutex_lock(&c->btree_interior_update_lock); - ret = !list_empty(&c->btree_interior_update_list); - mutex_unlock(&c->btree_interior_update_lock); - - return ret; + guard(mutex)(&c->btree_interior_update_lock); + return !list_empty(&c->btree_interior_update_list); } bool bch2_btree_interior_updates_flush(struct bch_fs *c) @@ -2745,13 +2726,11 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry { struct btree_root *r = bch2_btree_id_root(c, entry->btree_id); - mutex_lock(&c->btree_root_lock); + guard(mutex)(&c->btree_interior_update_lock); r->level = entry->level; r->alive = true; bkey_copy(&r->key, (struct bkey_i *) entry->start); - - mutex_unlock(&c->btree_root_lock); } struct jset_entry * @@ -2759,11 +2738,9 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c, struct jset_entry *end, unsigned long skip) { - unsigned i; + guard(mutex)(&c->btree_interior_update_lock); - mutex_lock(&c->btree_root_lock); - - for (i = 0; i < btree_id_nr_alive(c); i++) { + for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { struct btree_root *r = bch2_btree_id_root(c, i); if (r->alive && !test_bit(i, &skip)) { @@ -2773,8 +2750,6 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c, } } - mutex_unlock(&c->btree_root_lock); - return end; } diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index ac04e45a8515..6ed049f19a9a 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -175,15 +175,19 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, } int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *, - struct btree *, unsigned, unsigned); + struct btree *, unsigned, + enum bch_trans_commit_flags); int bch2_btree_node_rewrite_key(struct btree_trans *, enum btree_id, unsigned, - struct bkey_i *, unsigned); + struct bkey_i *, + enum bch_trans_commit_flags); int bch2_btree_node_rewrite_pos(struct btree_trans *, enum btree_id, unsigned, - struct bpos, unsigned, unsigned); + struct bpos, unsigned, + enum bch_trans_commit_flags); int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *, - struct btree *, unsigned); + struct btree *, + enum bch_trans_commit_flags); void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *); diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 4b095235a0d2..9cfc3edce39a 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -259,9 +259,8 @@ out: bch2_btree_write_buffer_journal_flush); if (j->watermark) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); bch2_journal_set_watermark(j); - spin_unlock(&j->lock); } BUG_ON(wb->sorted.size < wb->flushing.keys.nr); @@ -270,7 +269,7 @@ out: int bch2_btree_write_buffer_insert_err(struct bch_fs *c, enum btree_id btree, struct bkey_i *k) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "attempting to do write buffer update on non wb btree="); bch2_btree_id_to_text(&buf, btree); @@ -278,7 +277,6 @@ int bch2_btree_write_buffer_insert_err(struct bch_fs *c, bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); bch2_fs_inconsistent(c, "%s", buf.buf); - printbuf_exit(&buf); return -EROFS; } @@ -300,9 +298,8 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) bch2_trans_unlock(trans); bch2_trans_begin(trans); - mutex_lock(&wb->inc.lock); - move_keys_from_inc_to_flushing(wb); - mutex_unlock(&wb->inc.lock); + scoped_guard(mutex, &wb->inc.lock) + move_keys_from_inc_to_flushing(wb); for (size_t i = 0; i < wb->flushing.keys.nr; i++) { wb->sorted.data[i].idx = i; @@ -330,10 +327,9 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) darray_for_each(wb->sorted, i) { struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx]; - if (unlikely(!btree_type_uses_write_buffer(k->btree))) { - ret = bch2_btree_write_buffer_insert_err(trans->c, k->btree, &k->k); + ret = bch2_btree_write_buffer_insert_checks(c, k->btree, &k->k); + if (unlikely(ret)) goto err; - } for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++) prefetch(&wb->flushing.keys.data[n->idx]); @@ -534,9 +530,8 @@ static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 max_seq) ret = bch2_journal_keys_to_write_buffer(c, buf); if (!blocked && !ret) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); buf->need_flush_to_write_buffer = false; - spin_unlock(&j->lock); } mutex_unlock(&j->buf_lock); @@ -568,9 +563,8 @@ static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 max_seq, * On memory allocation failure, bch2_btree_write_buffer_flush_locked() * is not guaranteed to empty wb->inc: */ - mutex_lock(&wb->flushing.lock); - ret = bch2_btree_write_buffer_flush_locked(trans); - mutex_unlock(&wb->flushing.lock); + scoped_guard(mutex, &wb->flushing.lock) + ret = bch2_btree_write_buffer_flush_locked(trans); } while (!ret && (fetch_from_journal_err || (wb->inc.pin.seq && wb->inc.pin.seq <= max_seq) || @@ -583,9 +577,10 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j, struct journal_entry_pin *_pin, u64 seq) { struct bch_fs *c = container_of(j, struct bch_fs, journal); + CLASS(btree_trans, trans)(c); bool did_work = false; - return bch2_trans_run(c, btree_write_buffer_flush_seq(trans, seq, &did_work)); + return btree_write_buffer_flush_seq(trans, seq, &did_work); } int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans) @@ -607,9 +602,9 @@ bool bch2_btree_write_buffer_flush_going_ro(struct bch_fs *c) if (bch2_journal_error(&c->journal)) return false; + CLASS(btree_trans, trans)(c); bool did_work = false; - bch2_trans_run(c, btree_write_buffer_flush_seq(trans, - journal_cur_seq(&c->journal), &did_work)); + btree_write_buffer_flush_seq(trans, journal_cur_seq(&c->journal), &did_work); return did_work; } @@ -656,11 +651,10 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans, if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) { if (trace_write_buffer_maybe_flush_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, referring_k); trace_write_buffer_maybe_flush(trans, _RET_IP_, buf.buf); - printbuf_exit(&buf); } bch2_bkey_buf_reassemble(&tmp, c, referring_k); @@ -691,11 +685,12 @@ static void bch2_btree_write_buffer_flush_work(struct work_struct *work) struct btree_write_buffer *wb = &c->btree_write_buffer; int ret; - mutex_lock(&wb->flushing.lock); - do { - ret = bch2_trans_run(c, bch2_btree_write_buffer_flush_locked(trans)); - } while (!ret && bch2_btree_write_buffer_should_flush(c)); - mutex_unlock(&wb->flushing.lock); + scoped_guard(mutex, &wb->flushing.lock) { + CLASS(btree_trans, trans)(c); + do { + ret = bch2_btree_write_buffer_flush_locked(trans); + } while (!ret && bch2_btree_write_buffer_should_flush(c)); + } enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer); } diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h index c351d21aca0b..e484cd6b90b0 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h @@ -89,11 +89,9 @@ static inline int bch2_journal_key_to_wb(struct bch_fs *c, struct journal_keys_to_wb *dst, enum btree_id btree, struct bkey_i *k) { - if (unlikely(!btree_type_uses_write_buffer(btree))) { - int ret = bch2_btree_write_buffer_insert_err(c, btree, k); - dump_stack(); + int ret = bch2_btree_write_buffer_insert_checks(c, btree, k); + if (unlikely(ret)) return ret; - } EBUG_ON(!dst->seq); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index f25903c10e8a..5aab527e3e7c 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -71,13 +71,8 @@ __bch2_fs_usage_read_short(struct bch_fs *c) struct bch_fs_usage_short bch2_fs_usage_read_short(struct bch_fs *c) { - struct bch_fs_usage_short ret; - - percpu_down_read(&c->mark_lock); - ret = __bch2_fs_usage_read_short(c); - percpu_up_read(&c->mark_lock); - - return ret; + guard(percpu_read)(&c->mark_lock); + return __bch2_fs_usage_read_short(c); } void bch2_dev_usage_to_text(struct printbuf *out, @@ -113,10 +108,10 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, bool *do_update) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; - struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); + CLASS(bch2_dev_tryget, ca)(c, p.ptr.dev); if (!ca) { if (fsck_err_on(p.ptr.dev != BCH_SB_MEMBER_INVALID, trans, ptr_to_invalid_device, @@ -138,7 +133,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) *do_update = true; - goto out; + return 0; } enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry); @@ -158,7 +153,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, } else { /* this pointer will be dropped */ *do_update = true; - goto out; + return 0; } } @@ -208,7 +203,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, *do_update = true; if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen) - goto out; + return 0; if (fsck_err_on(bucket_data_type_mismatch(g->data_type, data_type), trans, ptr_bucket_data_type_mismatch, @@ -224,14 +219,13 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, switch (g->data_type) { case BCH_DATA_sb: bch_err(c, "btree and superblock in the same bucket - cannot repair"); - ret = bch_err_throw(c, fsck_repair_unimplemented); - goto out; + return bch_err_throw(c, fsck_repair_unimplemented); case BCH_DATA_journal: ret = bch2_dev_journal_bucket_delete(ca, PTR_BUCKET_NR(ca, &p.ptr)); bch_err_msg(c, ret, "error deleting journal bucket %zu", PTR_BUCKET_NR(ca, &p.ptr)); if (ret) - goto out; + return ret; break; } @@ -265,10 +259,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, bch2_bkey_val_to_text(&buf, c, k), buf.buf))) *do_update = true; } -out: fsck_err: - bch2_dev_put(ca); - printbuf_exit(&buf); return ret; } @@ -281,7 +272,7 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, const union bch_extent_entry *entry_c; struct extent_ptr_decoded p = { 0 }; bool do_update = false; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; /* We don't yet do btree key updates correctly for when we're RW */ @@ -290,14 +281,14 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) { ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update); if (ret) - goto err; + return ret; } if (do_update) { struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); ret = PTR_ERR_OR_ZERO(new); if (ret) - goto err; + return ret; scoped_guard(rcu) bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, !bch2_dev_exists(c, ptr->dev)); @@ -387,7 +378,7 @@ found: BTREE_TRIGGER_norun); bch2_trans_iter_exit(trans, &iter); if (ret) - goto err; + return ret; if (level) bch2_btree_node_update_key_early(trans, btree, level - 1, k, new); @@ -396,7 +387,7 @@ found: jset_u64s(new->k.u64s)); ret = PTR_ERR_OR_ZERO(e); if (ret) - goto err; + return ret; journal_entry_set(e, BCH_JSET_ENTRY_btree_root, @@ -413,9 +404,8 @@ found: bkey_copy(&b->key, new); } } -err: - printbuf_exit(&buf); - return ret; + + return 0; } static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf, @@ -460,9 +450,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, { struct bch_fs *c = trans->c; size_t bucket_nr = PTR_BUCKET_NR(ca, ptr); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool inserting = sectors > 0; - int ret = 0; BUG_ON(!sectors); @@ -474,9 +463,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, bch2_data_type_str(bucket_data_type ?: ptr_data_type), ptr->gen); - ret = bucket_ref_update_err(trans, &buf, k, inserting, - BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen); - goto out; + return bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen); } if (unlikely(gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX)) { @@ -487,15 +475,12 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, bch2_data_type_str(bucket_data_type ?: ptr_data_type), ptr->gen); - ret = bucket_ref_update_err(trans, &buf, k, inserting, - BCH_FSCK_ERR_ptr_too_stale); - goto out; + return bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_ptr_too_stale); } - if (b_gen != ptr->gen && ptr->cached) { - ret = 1; - goto out; - } + if (b_gen != ptr->gen && ptr->cached) + return 1; if (unlikely(b_gen != ptr->gen)) { bch2_log_msg_start(c, &buf); @@ -506,9 +491,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, bch2_data_type_str(bucket_data_type ?: ptr_data_type), ptr->gen); - ret = bucket_ref_update_err(trans, &buf, k, inserting, - BCH_FSCK_ERR_stale_dirty_ptr); - goto out; + return bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_stale_dirty_ptr); } if (unlikely(bucket_data_type_mismatch(bucket_data_type, ptr_data_type))) { @@ -518,9 +502,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, bch2_data_type_str(bucket_data_type), bch2_data_type_str(ptr_data_type)); - ret = bucket_ref_update_err(trans, &buf, k, inserting, + return bucket_ref_update_err(trans, &buf, k, inserting, BCH_FSCK_ERR_ptr_bucket_data_type_mismatch); - goto out; } if (unlikely((u64) *bucket_sectors + sectors > U32_MAX)) { @@ -531,16 +514,13 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, bch2_data_type_str(bucket_data_type ?: ptr_data_type), *bucket_sectors, sectors); - ret = bucket_ref_update_err(trans, &buf, k, inserting, - BCH_FSCK_ERR_bucket_sector_count_overflow); sectors = -*bucket_sectors; - goto out; + return bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_bucket_sector_count_overflow); } *bucket_sectors += sectors; -out: - printbuf_exit(&buf); - return ret; + return 0; } void bch2_trans_account_disk_usage_change(struct btree_trans *trans) @@ -550,7 +530,7 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans) static int warned_disk_usage = 0; bool warn = false; - percpu_down_read(&c->mark_lock); + guard(percpu_read)(&c->mark_lock); struct bch_fs_usage_base *src = &trans->fs_usage_delta; s64 added = src->btree + src->data + src->reserved; @@ -578,11 +558,10 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans) this_cpu_sub(*c->online_reserved, added); } - preempt_disable(); - struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); - acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64)); - preempt_enable(); - percpu_up_read(&c->mark_lock); + scoped_guard(preempt) { + struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); + acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64)); + } if (unlikely(warn) && !xchg(&warned_disk_usage, 1)) bch2_trans_inconsistent(trans, @@ -621,40 +600,34 @@ static int bch2_trigger_pointer(struct btree_trans *trans, { struct bch_fs *c = trans->c; bool insert = !(flags & BTREE_TRIGGER_overwrite); - struct printbuf buf = PRINTBUF; - int ret = 0; + CLASS(printbuf, buf)(); struct bkey_i_backpointer bp; bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp); *sectors = insert ? bp.v.bucket_len : -(s64) bp.v.bucket_len; - struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); + CLASS(bch2_dev_tryget, ca)(c, p.ptr.dev); if (unlikely(!ca)) { if (insert && p.ptr.dev != BCH_SB_MEMBER_INVALID) - ret = bch_err_throw(c, trigger_pointer); - goto err; + return bch_err_throw(c, trigger_pointer); + return 0; } struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); if (!bucket_valid(ca, bucket.offset)) { if (insert) { bch2_dev_bucket_missing(ca, bucket.offset); - ret = bch_err_throw(c, trigger_pointer); + return bch_err_throw(c, trigger_pointer); } - goto err; + return 0; } if (flags & BTREE_TRIGGER_transactional) { struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); - ret = PTR_ERR_OR_ZERO(a) ?: - __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v, insert); - if (ret) - goto err; - - ret = bch2_bucket_backpointer_mod(trans, k, &bp, insert); - if (ret) - goto err; + return PTR_ERR_OR_ZERO(a) ?: + __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v, insert) ?: + bch2_bucket_backpointer_mod(trans, k, &bp, insert); } if (flags & BTREE_TRIGGER_gc) { @@ -662,23 +635,22 @@ static int bch2_trigger_pointer(struct btree_trans *trans, if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s", p.ptr.dev, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = bch_err_throw(c, trigger_pointer); - goto err; + return bch_err_throw(c, trigger_pointer); } bucket_lock(g); struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; - ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new, insert); + int ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new, insert); alloc_to_bucket(g, new); bucket_unlock(g); - if (!ret) - ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); + if (ret) + return ret; + + return bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); } -err: - bch2_dev_put(ca); - printbuf_exit(&buf); - return ret; + + return 0; } static int bch2_trigger_stripe_ptr(struct btree_trans *trans, @@ -738,14 +710,13 @@ err: if (!m || !m->alive) { gc_stripe_unlock(m); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "pointer to nonexistent stripe %llu\n while marking ", (u64) p.ec.idx); bch2_bkey_val_to_text(&buf, c, k); __bch2_inconsistent_error(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); return bch_err_throw(c, trigger_stripe_pointer); } @@ -996,7 +967,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, return PTR_ERR(a); if (a->v.data_type && type && a->v.data_type != type) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" "while marking %s\n", @@ -1012,7 +983,6 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, /* Always print, this is always fatal */ bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); if (!ret) ret = bch_err_throw(c, metadata_bucket_inconsistency); goto err; @@ -1034,7 +1004,6 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev * enum btree_iter_update_trigger_flags flags) { struct bch_fs *c = trans->c; - int ret = 0; struct bucket *g = gc_bucket(ca, b); if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s", @@ -1062,8 +1031,7 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev * g->dirty_sectors += sectors; struct bch_alloc_v4 new = bucket_m_to_alloc(*g); bucket_unlock(g); - ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); - return ret; + return bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); err_unlock: bucket_unlock(g); err: @@ -1125,10 +1093,10 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *c enum btree_iter_update_trigger_flags flags) { struct bch_fs *c = trans->c; + struct bch_sb_layout layout; - mutex_lock(&c->sb_lock); - struct bch_sb_layout layout = ca->disk_sb.sb->layout; - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) + layout = ca->disk_sb.sb->layout; u64 bucket = 0; unsigned i, bucket_sectors = 0; @@ -1173,8 +1141,8 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *c int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca, enum btree_iter_update_trigger_flags flags) { - int ret = bch2_trans_run(c, - __bch2_trans_mark_dev_sb(trans, ca, flags)); + CLASS(btree_trans, trans)(c); + int ret = __bch2_trans_mark_dev_sb(trans, ca, flags); bch_err_fn(c, ret); return ret; } @@ -1227,15 +1195,38 @@ bool bch2_is_superblock_bucket(struct bch_dev *ca, u64 b) #define SECTORS_CACHE 1024 +static int disk_reservation_recalc_sectors_available(struct bch_fs *c, + struct disk_reservation *res, + u64 sectors, enum bch_reservation_flags flags) +{ + guard(mutex)(&c->sectors_available_lock); + + percpu_u64_set(&c->pcpu->sectors_available, 0); + u64 sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free); + + if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL)) + sectors = min(sectors, sectors_available); + + if (sectors <= sectors_available || + (flags & BCH_DISK_RESERVATION_NOFAIL)) { + atomic64_set(&c->sectors_available, + max_t(s64, 0, sectors_available - sectors)); + this_cpu_add(*c->online_reserved, sectors); + res->sectors += sectors; + return 0; + } else { + atomic64_set(&c->sectors_available, sectors_available); + return bch_err_throw(c, ENOSPC_disk_reservation); + } +} + int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, u64 sectors, enum bch_reservation_flags flags) { struct bch_fs_pcpu *pcpu; u64 old, get; - u64 sectors_available; - int ret; - percpu_down_read(&c->mark_lock); + guard(percpu_read)(&c->mark_lock); preempt_disable(); pcpu = this_cpu_ptr(c->pcpu); @@ -1246,9 +1237,10 @@ int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, do { get = min((u64) sectors + SECTORS_CACHE, old); - if (get < sectors) { + if (unlikely(get < sectors)) { preempt_enable(); - goto recalculate; + return disk_reservation_recalc_sectors_available(c, + res, sectors, flags); } } while (!atomic64_try_cmpxchg(&c->sectors_available, &old, old - get)); @@ -1259,36 +1251,8 @@ out: pcpu->sectors_available -= sectors; this_cpu_add(*c->online_reserved, sectors); res->sectors += sectors; - preempt_enable(); - percpu_up_read(&c->mark_lock); return 0; - -recalculate: - mutex_lock(&c->sectors_available_lock); - - percpu_u64_set(&c->pcpu->sectors_available, 0); - sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free); - - if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL)) - sectors = min(sectors, sectors_available); - - if (sectors <= sectors_available || - (flags & BCH_DISK_RESERVATION_NOFAIL)) { - atomic64_set(&c->sectors_available, - max_t(s64, 0, sectors_available - sectors)); - this_cpu_add(*c->online_reserved, sectors); - res->sectors += sectors; - ret = 0; - } else { - atomic64_set(&c->sectors_available, sectors_available); - ret = bch_err_throw(c, ENOSPC_disk_reservation); - } - - mutex_unlock(&c->sectors_available_lock); - percpu_up_read(&c->mark_lock); - - return ret; } /* Startup/shutdown: */ diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c index 832eff93acb6..ca341586920b 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.c +++ b/fs/bcachefs/buckets_waiting_for_journal.c @@ -25,25 +25,20 @@ static void bucket_table_init(struct buckets_waiting_for_journal_table *t, size_ u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *b, unsigned dev, u64 bucket) { - struct buckets_waiting_for_journal_table *t; u64 dev_bucket = (u64) dev << 56 | bucket; - u64 ret = 0; - mutex_lock(&b->lock); - t = b->t; + guard(mutex)(&b->lock); + + struct buckets_waiting_for_journal_table *t = b->t; for (unsigned i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) { struct bucket_hashed *h = bucket_hash(t, i, dev_bucket); - if (h->dev_bucket == dev_bucket) { - ret = h->journal_seq; - break; - } + if (h->dev_bucket == dev_bucket) + return h->journal_seq; } - mutex_unlock(&b->lock); - - return ret; + return 0; } static bool bucket_table_insert(struct buckets_waiting_for_journal_table *t, @@ -92,12 +87,11 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, .journal_seq = journal_seq, }; size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0, nr_rehashes_this_size = 0; - int ret = 0; - mutex_lock(&b->lock); + guard(mutex)(&b->lock); if (likely(bucket_table_insert(b->t, &new, flushed_seq))) - goto out; + return 0; t = b->t; size = 1UL << t->bits; @@ -109,8 +103,7 @@ realloc: n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL); if (!n) { struct bch_fs *c = container_of(b, struct bch_fs, buckets_waiting_for_journal); - ret = bch_err_throw(c, ENOMEM_buckets_waiting_for_journal_set); - goto out; + return bch_err_throw(c, ENOMEM_buckets_waiting_for_journal_set); } retry_rehash: @@ -143,10 +136,7 @@ retry_rehash: pr_debug("took %zu rehashes, table at %zu/%lu elements", nr_rehashes, nr_elements, 1UL << b->t->bits); -out: - mutex_unlock(&b->lock); - - return ret; + return 0; } void bch2_fs_buckets_waiting_for_journal_exit(struct bch_fs *c) diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 5ea89aa2b0c4..467fc45e84fe 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -52,6 +52,11 @@ static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev, return ca; } +DEFINE_CLASS(bch2_device_lookup, struct bch_dev *, + bch2_dev_put(_T), + bch2_device_lookup(c, dev, flags), + struct bch_fs *c, u64 dev, unsigned flags); + #if 0 static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg) { @@ -207,8 +212,6 @@ static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg) static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg) { - struct bch_dev *ca; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -219,7 +222,7 @@ static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg) arg.pad) return -EINVAL; - ca = bch2_device_lookup(c, arg.dev, arg.flags); + struct bch_dev *ca = bch2_device_lookup(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); @@ -249,9 +252,6 @@ static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg) static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg) { - struct bch_dev *ca; - int ret; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -262,21 +262,16 @@ static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg) arg.pad) return -EINVAL; - ca = bch2_device_lookup(c, arg.dev, arg.flags); + CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); - ret = bch2_dev_offline(c, ca, arg.flags); - bch2_dev_put(ca); - return ret; + return bch2_dev_offline(c, ca, arg.flags); } static long bch2_ioctl_disk_set_state(struct bch_fs *c, struct bch_ioctl_disk_set_state arg) { - struct bch_dev *ca; - int ret; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -288,15 +283,12 @@ static long bch2_ioctl_disk_set_state(struct bch_fs *c, arg.new_state >= BCH_MEMBER_STATE_NR) return -EINVAL; - ca = bch2_device_lookup(c, arg.dev, arg.flags); + CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); - ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags); - if (ret) - bch_err(c, "Error setting device state: %s", bch2_err_str(ret)); - - bch2_dev_put(ca); + int ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags); + bch_err_msg(ca, ret, "setting device state"); return ret; } @@ -312,7 +304,7 @@ static int bch2_data_thread(void *arg) { struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr); - ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg); + ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, &ctx->arg); if (ctx->thr.ret == -BCH_ERR_device_offline) ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_device_offline; else { @@ -349,14 +341,13 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf, }; if (ctx->arg.op == BCH_DATA_OP_scrub) { - struct bch_dev *ca = bch2_dev_tryget(c, ctx->arg.scrub.dev); + CLASS(bch2_dev_tryget_noerror, ca)(c, ctx->arg.scrub.dev); if (ca) { struct bch_dev_usage_full u; bch2_dev_usage_full_read_fast(ca, &u); for (unsigned i = BCH_DATA_btree; i < ARRAY_SIZE(u.d); i++) if (ctx->arg.scrub.data_types & BIT(i)) e.p.sectors_total += u.d[i].sectors; - bch2_dev_put(ca); } } else { e.p.sectors_total = bch2_fs_usage_read_short(c).used; @@ -418,9 +409,8 @@ static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c, struct bch_ioctl_fs_usage __user *user_arg) { struct bch_ioctl_fs_usage arg = {}; - darray_char replicas = {}; + CLASS(darray_char, replicas)(); u32 replica_entries_bytes; - int ret = 0; if (!test_bit(BCH_FS_started, &c->flags)) return -EINVAL; @@ -428,11 +418,11 @@ static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c, if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes)) return -EFAULT; - ret = bch2_fs_replicas_usage_read(c, &replicas) ?: + int ret = bch2_fs_replicas_usage_read(c, &replicas) ?: (replica_entries_bytes < replicas.nr ? -ERANGE : 0) ?: copy_to_user_errcode(&user_arg->replicas, replicas.data, replicas.nr); if (ret) - goto err; + return ret; struct bch_fs_usage_short u = bch2_fs_usage_read_short(c); arg.capacity = c->capacity; @@ -449,52 +439,41 @@ static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c, &arg.persistent_reserved[i], 1); } - ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg)); -err: - darray_exit(&replicas); - return ret; + return copy_to_user_errcode(user_arg, &arg, sizeof(arg)); } static long bch2_ioctl_query_accounting(struct bch_fs *c, struct bch_ioctl_query_accounting __user *user_arg) { struct bch_ioctl_query_accounting arg; - darray_char accounting = {}; - int ret = 0; + CLASS(darray_char, accounting)(); if (!test_bit(BCH_FS_started, &c->flags)) return -EINVAL; - ret = copy_from_user_errcode(&arg, user_arg, sizeof(arg)) ?: + int ret = copy_from_user_errcode(&arg, user_arg, sizeof(arg)) ?: bch2_fs_accounting_read(c, &accounting, arg.accounting_types_mask) ?: (arg.accounting_u64s * sizeof(u64) < accounting.nr ? -ERANGE : 0) ?: copy_to_user_errcode(&user_arg->accounting, accounting.data, accounting.nr); if (ret) - goto err; + return ret; arg.capacity = c->capacity; arg.used = bch2_fs_usage_read_short(c).used; arg.online_reserved = percpu_u64_get(c->online_reserved); arg.accounting_u64s = accounting.nr / sizeof(u64); - ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg)); -err: - darray_exit(&accounting); - return ret; + return copy_to_user_errcode(user_arg, &arg, sizeof(arg)); } /* obsolete, didn't allow for new data types: */ static noinline_for_stack long bch2_ioctl_dev_usage(struct bch_fs *c, struct bch_ioctl_dev_usage __user *user_arg) { - struct bch_ioctl_dev_usage arg; - struct bch_dev_usage_full src; - struct bch_dev *ca; - unsigned i; - if (!test_bit(BCH_FS_started, &c->flags)) return -EINVAL; + struct bch_ioctl_dev_usage arg; if (copy_from_user(&arg, user_arg, sizeof(arg))) return -EFAULT; @@ -504,38 +483,32 @@ static noinline_for_stack long bch2_ioctl_dev_usage(struct bch_fs *c, arg.pad[2]) return -EINVAL; - ca = bch2_device_lookup(c, arg.dev, arg.flags); + CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); - src = bch2_dev_usage_full_read(ca); + struct bch_dev_usage_full src = bch2_dev_usage_full_read(ca); arg.state = ca->mi.state; arg.bucket_size = ca->mi.bucket_size; arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket; - for (i = 0; i < ARRAY_SIZE(arg.d); i++) { + for (unsigned i = 0; i < ARRAY_SIZE(arg.d); i++) { arg.d[i].buckets = src.d[i].buckets; arg.d[i].sectors = src.d[i].sectors; arg.d[i].fragmented = src.d[i].fragmented; } - bch2_dev_put(ca); - return copy_to_user_errcode(user_arg, &arg, sizeof(arg)); } static long bch2_ioctl_dev_usage_v2(struct bch_fs *c, struct bch_ioctl_dev_usage_v2 __user *user_arg) { - struct bch_ioctl_dev_usage_v2 arg; - struct bch_dev_usage_full src; - struct bch_dev *ca; - int ret = 0; - if (!test_bit(BCH_FS_started, &c->flags)) return -EINVAL; + struct bch_ioctl_dev_usage_v2 arg; if (copy_from_user(&arg, user_arg, sizeof(arg))) return -EFAULT; @@ -545,20 +518,20 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c, arg.pad[2]) return -EINVAL; - ca = bch2_device_lookup(c, arg.dev, arg.flags); + CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); - src = bch2_dev_usage_full_read(ca); + struct bch_dev_usage_full src = bch2_dev_usage_full_read(ca); arg.state = ca->mi.state; arg.bucket_size = ca->mi.bucket_size; arg.nr_data_types = min(arg.nr_data_types, BCH_DATA_NR); arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket; - ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg)); + int ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg)); if (ret) - goto err; + return ret; for (unsigned i = 0; i < arg.nr_data_types; i++) { struct bch_ioctl_dev_usage_type t = { @@ -569,11 +542,10 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c, ret = copy_to_user_errcode(&user_arg->d[i], &t, sizeof(t)); if (ret) - goto err; + return ret; } -err: - bch2_dev_put(ca); - return ret; + + return 0; } static long bch2_ioctl_read_super(struct bch_fs *c, @@ -590,13 +562,13 @@ static long bch2_ioctl_read_super(struct bch_fs *c, arg.pad) return -EINVAL; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); if (arg.flags & BCH_READ_DEV) { ca = bch2_device_lookup(c, arg.dev, arg.flags); ret = PTR_ERR_OR_ZERO(ca); if (ret) - goto err_unlock; + return ret; sb = ca->disk_sb.sb; } else { @@ -612,8 +584,6 @@ static long bch2_ioctl_read_super(struct bch_fs *c, vstruct_bytes(sb)); err: bch2_dev_put(ca); -err_unlock: - mutex_unlock(&c->sb_lock); return ret; } @@ -639,9 +609,6 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c, static long bch2_ioctl_disk_resize(struct bch_fs *c, struct bch_ioctl_disk_resize arg) { - struct bch_dev *ca; - int ret; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -649,22 +616,16 @@ static long bch2_ioctl_disk_resize(struct bch_fs *c, arg.pad) return -EINVAL; - ca = bch2_device_lookup(c, arg.dev, arg.flags); + CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); - ret = bch2_dev_resize(c, ca, arg.nbuckets); - - bch2_dev_put(ca); - return ret; + return bch2_dev_resize(c, ca, arg.nbuckets); } static long bch2_ioctl_disk_resize_journal(struct bch_fs *c, struct bch_ioctl_disk_resize_journal arg) { - struct bch_dev *ca; - int ret; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -675,14 +636,11 @@ static long bch2_ioctl_disk_resize_journal(struct bch_fs *c, if (arg.nbuckets > U32_MAX) return -EINVAL; - ca = bch2_device_lookup(c, arg.dev, arg.flags); + CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); - ret = bch2_set_nr_journal_buckets(c, ca, arg.nbuckets); - - bch2_dev_put(ca); - return ret; + return bch2_set_nr_journal_buckets(c, ca, arg.nbuckets); } #define BCH_IOCTL(_name, _argtype) \ diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c index a6795e73f0b9..b1ec38992852 100644 --- a/fs/bcachefs/checksum.c +++ b/fs/bcachefs/checksum.c @@ -361,7 +361,7 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio, extent_nonce(version, crc_old), bio); if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "checksum error in %s() (memory corruption or bug?)\n" " expected %0llx:%0llx got %0llx:%0llx (old type ", __func__, @@ -374,7 +374,6 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio, bch2_prt_csum_type(&buf, new_csum_type); prt_str(&buf, ")"); WARN_RATELIMIT(1, "%s", buf.buf); - printbuf_exit(&buf); return bch_err_throw(c, recompute_checksum); } @@ -438,23 +437,21 @@ const struct bch_sb_field_ops bch_sb_field_ops_crypt = { #ifdef __KERNEL__ static int __bch2_request_key(char *key_description, struct bch_key *key) { - struct key *keyring_key; - const struct user_key_payload *ukp; int ret; - keyring_key = request_key(&key_type_user, key_description, NULL); + struct key *keyring_key = request_key(&key_type_user, key_description, NULL); if (IS_ERR(keyring_key)) return PTR_ERR(keyring_key); - down_read(&keyring_key->sem); - ukp = dereference_key_locked(keyring_key); - if (ukp->datalen == sizeof(*key)) { - memcpy(key, ukp->data, ukp->datalen); - ret = 0; - } else { - ret = -EINVAL; + scoped_guard(rwsem_read, &keyring_key->sem) { + const struct user_key_payload *ukp = dereference_key_locked(keyring_key); + if (ukp->datalen == sizeof(*key)) { + memcpy(key, ukp->data, ukp->datalen); + ret = 0; + } else { + ret = -EINVAL; + } } - up_read(&keyring_key->sem); key_put(keyring_key); return ret; @@ -495,14 +492,13 @@ got_key: int bch2_request_key(struct bch_sb *sb, struct bch_key *key) { - struct printbuf key_description = PRINTBUF; + CLASS(printbuf, key_description)(); int ret; prt_printf(&key_description, "bcachefs:"); pr_uuid(&key_description, sb->user_uuid.b); ret = __bch2_request_key(key_description.buf, key); - printbuf_exit(&key_description); #ifndef __KERNEL__ if (ret) { @@ -524,13 +520,12 @@ int bch2_request_key(struct bch_sb *sb, struct bch_key *key) int bch2_revoke_key(struct bch_sb *sb) { key_serial_t key_id; - struct printbuf key_description = PRINTBUF; + CLASS(printbuf, key_description)(); prt_printf(&key_description, "bcachefs:"); pr_uuid(&key_description, sb->user_uuid.b); key_id = request_key("user", key_description.buf, NULL, KEY_SPEC_USER_KEYRING); - printbuf_exit(&key_description); if (key_id < 0) return errno; @@ -584,34 +579,28 @@ err: */ int bch2_disable_encryption(struct bch_fs *c) { - struct bch_sb_field_crypt *crypt; - struct bch_key key; - int ret = -EINVAL; - - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); - crypt = bch2_sb_field_get(c->disk_sb.sb, crypt); + struct bch_sb_field_crypt *crypt = bch2_sb_field_get(c->disk_sb.sb, crypt); if (!crypt) - goto out; + return -EINVAL; /* is key encrypted? */ ret = 0; if (bch2_key_is_encrypted(&crypt->key)) - goto out; + return 0; - ret = bch2_decrypt_sb_key(c, crypt, &key); + struct bch_key key; + int ret = bch2_decrypt_sb_key(c, crypt, &key); if (ret) - goto out; + return ret; crypt->key.magic = cpu_to_le64(BCH_KEY_MAGIC); crypt->key.key = key; SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 0); bch2_write_super(c); -out: - mutex_unlock(&c->sb_lock); - - return ret; + return 0; } /* @@ -625,7 +614,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed) struct bch_sb_field_crypt *crypt; int ret = -EINVAL; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); /* Do we already have an encryption key? */ if (bch2_sb_field_get(c->disk_sb.sb, crypt)) @@ -669,7 +658,6 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed) SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 1); bch2_write_super(c); err: - mutex_unlock(&c->sb_lock); memzero_explicit(&user_key, sizeof(user_key)); memzero_explicit(&key, sizeof(key)); return ret; diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c index 8e9264b5a84e..1c6d0cdca3c5 100644 --- a/fs/bcachefs/clock.c +++ b/fs/bcachefs/clock.c @@ -40,15 +40,13 @@ out: void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer) { - spin_lock(&clock->timer_lock); + guard(spinlock)(&clock->timer_lock); for (size_t i = 0; i < clock->timers.nr; i++) if (clock->timers.data[i] == timer) { min_heap_del(&clock->timers, i, &callbacks, NULL); - break; + return; } - - spin_unlock(&clock->timer_lock); } struct io_clock_wait { @@ -133,28 +131,27 @@ void __bch2_increment_clock(struct io_clock *clock, u64 sectors) struct io_timer *timer; u64 now = atomic64_add_return(sectors, &clock->now); - spin_lock(&clock->timer_lock); + guard(spinlock)(&clock->timer_lock); + while ((timer = get_expired_timer(clock, now))) timer->fn(timer); - spin_unlock(&clock->timer_lock); } void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock) { - out->atomic++; - spin_lock(&clock->timer_lock); u64 now = atomic64_read(&clock->now); printbuf_tabstop_push(out, 40); prt_printf(out, "current time:\t%llu\n", now); + guard(printbuf_atomic)(out); + guard(spinlock)(&clock->timer_lock); + for (unsigned i = 0; i < clock->timers.nr; i++) prt_printf(out, "%ps %ps:\t%llu\n", clock->timers.data[i]->fn, clock->timers.data[i]->fn2, clock->timers.data[i]->expire); - spin_unlock(&clock->timer_lock); - --out->atomic; } void bch2_io_clock_exit(struct io_clock *clock) diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index b37b1f325f0a..aeb9b9bd7d33 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -336,7 +336,7 @@ static int attempt_compress(struct bch_fs *c, void *workspace, void *dst, size_t dst_len, void *src, size_t src_len, - struct bch_compression_opt compression) + union bch_compression_opt compression) { enum bch_compression_type compression_type = __bch2_compression_opt_to_type[compression.type]; @@ -426,7 +426,7 @@ static int attempt_compress(struct bch_fs *c, static unsigned __bio_compress(struct bch_fs *c, struct bio *dst, size_t *dst_len, struct bio *src, size_t *src_len, - struct bch_compression_opt compression) + union bch_compression_opt compression) { struct bbuf src_data = { NULL }, dst_data = { NULL }; void *workspace; @@ -553,7 +553,7 @@ unsigned bch2_bio_compress(struct bch_fs *c, compression_type = __bio_compress(c, dst, dst_len, src, src_len, - bch2_compression_decode(compression_opt)); + (union bch_compression_opt){ .value = compression_opt }); dst->bi_iter.bi_size = orig_dst; src->bi_iter.bi_size = orig_src; @@ -579,30 +579,25 @@ static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f) if ((c->sb.features & f) == f) return 0; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); - if ((c->sb.features & f) == f) { - mutex_unlock(&c->sb_lock); + if ((c->sb.features & f) == f) return 0; - } ret = __bch2_fs_compress_init(c, c->sb.features|f); - if (ret) { - mutex_unlock(&c->sb_lock); + if (ret) return ret; - } c->disk_sb.sb->features[0] |= cpu_to_le64(f); bch2_write_super(c); - mutex_unlock(&c->sb_lock); - return 0; } int bch2_check_set_has_compressed_data(struct bch_fs *c, unsigned compression_opt) { - unsigned compression_type = bch2_compression_decode(compression_opt).type; + unsigned int compression_type = ((union bch_compression_opt){ .value = compression_opt }) + .type; BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature)); @@ -683,7 +678,7 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) static u64 compression_opt_to_feature(unsigned v) { - unsigned type = bch2_compression_decode(v).type; + unsigned int type = ((union bch_compression_opt){ .value = v }).type; return BIT_ULL(bch2_compression_opt_to_feature[type]); } @@ -703,7 +698,7 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res, { char *val = kstrdup(_val, GFP_KERNEL); char *p = val, *type_str, *level_str; - struct bch_compression_opt opt = { 0 }; + union bch_compression_opt opt = { 0 }; int ret; if (!val) @@ -736,7 +731,7 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res, opt.level = level; } - *res = bch2_compression_encode(opt); + *res = opt.value; err: kfree(val); return ret; @@ -744,7 +739,7 @@ err: void bch2_compression_opt_to_text(struct printbuf *out, u64 v) { - struct bch_compression_opt opt = bch2_compression_decode(v); + union bch_compression_opt opt = { .value = v }; if (opt.type < BCH_COMPRESSION_OPT_NR) prt_str(out, bch2_compression_opts[opt.type]); diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h index bec2f05bfd52..667ddb91d47a 100644 --- a/fs/bcachefs/compress.h +++ b/fs/bcachefs/compress.h @@ -10,41 +10,27 @@ static const unsigned __bch2_compression_opt_to_type[] = { #undef x }; -struct bch_compression_opt { - u8 type:4, - level:4; -}; - -static inline struct bch_compression_opt __bch2_compression_decode(unsigned v) -{ - return (struct bch_compression_opt) { - .type = v & 15, - .level = v >> 4, +union bch_compression_opt { + u8 value; + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u8 type:4, level:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + u8 level:4, type:4; +#endif }; -} +}; static inline bool bch2_compression_opt_valid(unsigned v) { - struct bch_compression_opt opt = __bch2_compression_decode(v); + union bch_compression_opt opt = { .value = v }; return opt.type < ARRAY_SIZE(__bch2_compression_opt_to_type) && !(!opt.type && opt.level); } -static inline struct bch_compression_opt bch2_compression_decode(unsigned v) -{ - return bch2_compression_opt_valid(v) - ? __bch2_compression_decode(v) - : (struct bch_compression_opt) { 0 }; -} - -static inline unsigned bch2_compression_encode(struct bch_compression_opt opt) -{ - return opt.type|(opt.level << 4); -} - static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v) { - return __bch2_compression_opt_to_type[bch2_compression_decode(v).type]; + return __bch2_compression_opt_to_type[((union bch_compression_opt){ .value = v }).type]; } struct bch_write_op; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index e848e210a9bf..ccedc93fe0ef 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -115,7 +115,7 @@ static void trace_io_move_finish2(struct data_update *u, struct bkey_i *insert) { struct bch_fs *c = u->op.c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_newline(&buf); @@ -131,7 +131,6 @@ static void trace_io_move_finish2(struct data_update *u, prt_newline(&buf); trace_io_move_finish(c, buf.buf); - printbuf_exit(&buf); } noinline_for_stack @@ -143,7 +142,7 @@ static void trace_io_move_fail2(struct data_update *m, { struct bch_fs *c = m->op.c; struct bkey_s_c old = bkey_i_to_s_c(m->k.k); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); unsigned rewrites_found = 0; if (!trace_io_move_fail_enabled()) @@ -187,7 +186,6 @@ static void trace_io_move_fail2(struct data_update *m, } trace_io_move_fail(c, buf.buf); - printbuf_exit(&buf); } noinline_for_stack @@ -196,7 +194,7 @@ static void trace_data_update2(struct data_update *m, struct bkey_i *insert) { struct bch_fs *c = m->op.c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "\nold: "); bch2_bkey_val_to_text(&buf, c, old); @@ -206,7 +204,6 @@ static void trace_data_update2(struct data_update *m, bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); trace_data_update(c, buf.buf); - printbuf_exit(&buf); } noinline_for_stack @@ -215,7 +212,7 @@ static void trace_io_move_created_rebalance2(struct data_update *m, struct bkey_i *insert) { struct bch_fs *c = m->op.c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_data_update_opts_to_text(&buf, c, &m->op.opts, &m->data_opts); @@ -227,7 +224,6 @@ static void trace_io_move_created_rebalance2(struct data_update *m, bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); trace_io_move_created_rebalance(c, buf.buf); - printbuf_exit(&buf); this_cpu_inc(c->counters[BCH_COUNTER_io_move_created_rebalance]); } @@ -238,7 +234,7 @@ static int data_update_invalid_bkey(struct data_update *m, struct bkey_i *insert) { struct bch_fs *c = m->op.c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_str(&buf, "about to insert invalid key in data update path"); @@ -254,7 +250,6 @@ static int data_update_invalid_bkey(struct data_update *m, bch2_fs_emergency_read_only2(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); return bch_err_throw(c, invalid_bkey); } @@ -499,7 +494,8 @@ out: int bch2_data_update_index_update(struct bch_write_op *op) { - return bch2_trans_run(op->c, __bch2_data_update_index_update(trans, op)); + CLASS(btree_trans, trans)(op->c); + return __bch2_data_update_index_update(trans, op); } void bch2_data_update_read_done(struct data_update *m) @@ -675,7 +671,7 @@ void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update if (!m->read_done) { prt_printf(out, "read:\n"); printbuf_indent_add(out, 2); - bch2_read_bio_to_text(out, &m->rbio); + bch2_read_bio_to_text(out, m->op.c, &m->rbio); } else { prt_printf(out, "write:\n"); printbuf_indent_add(out, 2); @@ -783,6 +779,9 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m) darray_for_each(m->op.devs_have, i) __clear_bit(*i, devs.d); + CLASS(printbuf, buf)(); + + guard(printbuf_atomic)(&buf); guard(rcu)(); unsigned nr_replicas = 0, i; @@ -794,7 +793,11 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m) struct bch_dev_usage usage; bch2_dev_usage_read_fast(ca, &usage); - if (!dev_buckets_free(ca, usage, m->op.watermark)) + u64 nr_free = dev_buckets_free(ca, usage, m->op.watermark); + + prt_printf(&buf, "%s=%llu ", ca->name, nr_free); + + if (!nr_free) continue; nr_replicas += ca->mi.durability; @@ -802,8 +805,10 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m) break; } - if (!nr_replicas) + if (!nr_replicas) { + trace_data_update_done_no_rw_devs(c, buf.buf); return bch_err_throw(c, data_update_done_no_rw_devs); + } if (nr_replicas < m->op.nr_replicas) return bch_err_throw(c, insufficient_devices); return 0; diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 07c2a0f73cc2..33cb94f70b19 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -141,7 +141,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) return; bch2_btree_node_io_lock(b); - mutex_lock(&c->verify_lock); + guard(mutex)(&c->verify_lock); if (!c->verify_ondisk) { c->verify_ondisk = kvmalloc(btree_buf_bytes(b), GFP_KERNEL); @@ -172,14 +172,11 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) failed |= bch2_btree_verify_replica(c, b, p); if (failed) { - struct printbuf buf = PRINTBUF; - + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); bch2_fs_fatal_error(c, ": btree node verify failed for: %s\n", buf.buf); - printbuf_exit(&buf); } out: - mutex_unlock(&c->verify_lock); bch2_btree_node_io_unlock(b); } @@ -367,17 +364,17 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, i->size = size; i->ret = 0; + CLASS(btree_trans, trans)(i->c); return bch2_debugfs_flush_buf(i) ?: - bch2_trans_run(i->c, - for_each_btree_key(trans, iter, i->id, i->from, - BTREE_ITER_prefetch| - BTREE_ITER_all_snapshots, k, ({ - bch2_bkey_val_to_text(&i->buf, i->c, k); - prt_newline(&i->buf); - bch2_trans_unlock(trans); - i->from = bpos_successor(iter.pos); - bch2_debugfs_flush_buf(i); - }))) ?: + for_each_btree_key(trans, iter, i->id, i->from, + BTREE_ITER_prefetch| + BTREE_ITER_all_snapshots, k, ({ + bch2_bkey_val_to_text(&i->buf, i->c, k); + prt_newline(&i->buf); + bch2_trans_unlock(trans); + i->from = bpos_successor(iter.pos); + bch2_debugfs_flush_buf(i); + })) ?: i->ret; } @@ -404,15 +401,15 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, if (bpos_eq(SPOS_MAX, i->from)) return i->ret; - return bch2_trans_run(i->c, - for_each_btree_node(trans, iter, i->id, i->from, 0, b, ({ - bch2_btree_node_to_text(&i->buf, i->c, b); - i->from = !bpos_eq(SPOS_MAX, b->key.k.p) - ? bpos_successor(b->key.k.p) - : b->key.k.p; + CLASS(btree_trans, trans)(i->c); + return for_each_btree_node(trans, iter, i->id, i->from, 0, b, ({ + bch2_btree_node_to_text(&i->buf, i->c, b); + i->from = !bpos_eq(SPOS_MAX, b->key.k.p) + ? bpos_successor(b->key.k.p) + : b->key.k.p; - drop_locks_do(trans, bch2_debugfs_flush_buf(i)); - }))) ?: i->ret; + drop_locks_do(trans, bch2_debugfs_flush_buf(i)); + })) ?: i->ret; } static const struct file_operations btree_format_debug_ops = { @@ -431,27 +428,27 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, i->size = size; i->ret = 0; + CLASS(btree_trans, trans)(i->c); return bch2_debugfs_flush_buf(i) ?: - bch2_trans_run(i->c, - for_each_btree_key(trans, iter, i->id, i->from, - BTREE_ITER_prefetch| - BTREE_ITER_all_snapshots, k, ({ - struct btree_path_level *l = - &btree_iter_path(trans, &iter)->l[0]; - struct bkey_packed *_k = - bch2_btree_node_iter_peek(&l->iter, l->b); - - if (bpos_gt(l->b->key.k.p, i->prev_node)) { - bch2_btree_node_to_text(&i->buf, i->c, l->b); - i->prev_node = l->b->key.k.p; - } - - bch2_bfloat_to_text(&i->buf, l->b, _k); - bch2_trans_unlock(trans); - i->from = bpos_successor(iter.pos); - bch2_debugfs_flush_buf(i); - }))) ?: - i->ret; + for_each_btree_key(trans, iter, i->id, i->from, + BTREE_ITER_prefetch| + BTREE_ITER_all_snapshots, k, ({ + struct btree_path_level *l = + &btree_iter_path(trans, &iter)->l[0]; + struct bkey_packed *_k = + bch2_btree_node_iter_peek(&l->iter, l->b); + + if (bpos_gt(l->b->key.k.p, i->prev_node)) { + bch2_btree_node_to_text(&i->buf, i->c, l->b); + i->prev_node = l->b->key.k.p; + } + + bch2_bfloat_to_text(&i->buf, l->b, _k); + bch2_trans_unlock(trans); + i->from = bpos_successor(iter.pos); + bch2_debugfs_flush_buf(i); + })) ?: + i->ret; } static const struct file_operations bfloat_failed_debug_ops = { @@ -465,7 +462,7 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs * struct btree *b) { if (!out->nr_tabstops) - printbuf_tabstop_push(out, 32); + printbuf_tabstop_push(out, 36); prt_printf(out, "%px ", b); bch2_btree_id_level_to_text(out, b->c.btree_id, b->c.level); @@ -512,8 +509,8 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, if (ret) return ret; - i->buf.atomic++; scoped_guard(rcu) { + guard(printbuf_atomic)(&i->buf); struct bucket_table *tbl = rht_dereference_rcu(c->btree_cache.table.tbl, &c->btree_cache.table); @@ -528,7 +525,6 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, done = true; } } - --i->buf.atomic; } while (!done); if (i->buf.allocation_failure) @@ -771,7 +767,7 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, prt_printf(&i->buf, "%s:\n", bch2_btree_transaction_fns[i->iter]); printbuf_indent_add(&i->buf, 2); - mutex_lock(&s->lock); + guard(mutex)(&s->lock); prt_printf(&i->buf, "Max mem used: %u\n", s->max_mem); #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE @@ -802,8 +798,6 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, printbuf_indent_sub(&i->buf, 2); } - mutex_unlock(&s->lock); - printbuf_indent_sub(&i->buf, 2); prt_newline(&i->buf); i->iter++; diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index ccbb0127b724..dd60c47528da 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -13,6 +13,7 @@ #include <linux/dcache.h> +#if IS_ENABLED(CONFIG_UNICODE) int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, const struct qstr *str, struct qstr *out_cf) { @@ -34,6 +35,7 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, *out_cf = (struct qstr) QSTR_INIT(buf, ret); return 0; } +#endif static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) { @@ -256,9 +258,11 @@ int bch2_dirent_init_name(struct bch_fs *c, if (ret) return ret; +#if IS_ENABLED(CONFIG_UNICODE) memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len); char *cf_out = &dirent->v.d_cf_name_block.d_names[name->len]; + void *val_end = bkey_val_end(bkey_i_to_s(&dirent->k_i)); if (cf_name) { cf_len = cf_name->len; @@ -266,21 +270,20 @@ int bch2_dirent_init_name(struct bch_fs *c, memcpy(cf_out, cf_name->name, cf_name->len); } else { cf_len = utf8_casefold(hash_info->cf_encoding, name, - cf_out, - bkey_val_end(bkey_i_to_s(&dirent->k_i)) - (void *) cf_out); + cf_out, val_end - (void *) cf_out); if (cf_len <= 0) return cf_len; } - memset(&dirent->v.d_cf_name_block.d_names[name->len + cf_len], 0, - bkey_val_bytes(&dirent->k) - - offsetof(struct bch_dirent, d_cf_name_block.d_names) - - name->len + cf_len); + void *name_end = &dirent->v.d_cf_name_block.d_names[name->len + cf_len]; + BUG_ON(name_end > val_end); + memset(name_end, 0, val_end - name_end); dirent->v.d_cf_name_block.d_name_len = cpu_to_le16(name->len); dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_len); EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_len); +#endif } unsigned u64s = dirent_val_u64s(name->len, cf_len); @@ -617,13 +620,12 @@ u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir, const struct bch_hash_info *hash_info, const struct qstr *name, subvol_inum *inum) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter = {}; int ret = lockrestart_do(trans, bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0)); bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return ret; } @@ -683,8 +685,8 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct bkey_buf sk; bch2_bkey_buf_init(&sk); - int ret = bch2_trans_run(c, - for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_dirents, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_dirents, POS(inum.inum, ctx->pos), POS(inum.inum, U64_MAX), inum.subvol, 0, k, ({ @@ -705,7 +707,7 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, continue; ret2 ?: (bch2_trans_unlock(trans), bch2_dir_emit(ctx, dirent, target)); - }))); + })); bch2_bkey_buf_exit(&sk, c); diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 1e17199cc5c7..efb58d2dcf68 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -23,8 +23,16 @@ struct bch_fs; struct bch_hash_info; struct bch_inode_info; +#if IS_ENABLED(CONFIG_UNICODE) int bch2_casefold(struct btree_trans *, const struct bch_hash_info *, const struct qstr *, struct qstr *); +#else +static inline int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, + const struct qstr *str, struct qstr *out_cf) +{ + return bch_err_throw(trans->c, no_casefolding_without_utf8); +} +#endif static inline int bch2_maybe_casefold(struct btree_trans *trans, const struct bch_hash_info *info, diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index f7528cd69c73..219e37738aee 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -380,11 +380,10 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun accounting_pos_cmp, NULL); if (trace_accounting_mem_insert_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_accounting_to_text(&buf, c, a.s_c); trace_accounting_mem_insert(c, buf.buf); - printbuf_exit(&buf); } return 0; err: @@ -404,9 +403,9 @@ int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, return bch_err_throw(c, btree_insert_need_mark_replicas); percpu_up_read(&c->mark_lock); - percpu_down_write(&c->mark_lock); - int ret = __bch2_accounting_mem_insert(c, a); - percpu_up_write(&c->mark_lock); + int ret; + scoped_guard(percpu_write, &c->mark_lock) + ret = __bch2_accounting_mem_insert(c, a); percpu_down_read(&c->mark_lock); return ret; } @@ -438,7 +437,7 @@ void bch2_accounting_mem_gc(struct bch_fs *c) { struct bch_accounting_mem *acc = &c->accounting; - percpu_down_write(&c->mark_lock); + guard(percpu_write)(&c->mark_lock); struct accounting_mem_entry *dst = acc->k.data; darray_for_each(acc->k, src) { @@ -453,7 +452,6 @@ void bch2_accounting_mem_gc(struct bch_fs *c) acc->k.nr = dst - acc->k.data; eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, NULL); - percpu_up_write(&c->mark_lock); } /* @@ -471,7 +469,7 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage) darray_init(usage); - percpu_down_read(&c->mark_lock); + guard(percpu_read)(&c->mark_lock); darray_for_each(acc->k, i) { union { u8 bytes[struct_size_t(struct bch_replicas_usage, r.devs, @@ -494,7 +492,6 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage) memcpy(&darray_top(*usage), &u.r, replicas_usage_bytes(&u.r)); usage->nr += replicas_usage_bytes(&u.r); } - percpu_up_read(&c->mark_lock); if (ret) darray_exit(usage); @@ -509,7 +506,7 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc darray_init(out_buf); - percpu_down_read(&c->mark_lock); + guard(percpu_read)(&c->mark_lock); darray_for_each(acc->k, i) { struct disk_accounting_pos a_p; bpos_to_disk_accounting_pos(&a_p, i->pos); @@ -533,8 +530,6 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc out_buf->nr += bkey_bytes(&a_out->k); } - percpu_up_read(&c->mark_lock); - if (ret) darray_exit(out_buf); return ret; @@ -553,7 +548,7 @@ int bch2_gc_accounting_start(struct bch_fs *c) struct bch_accounting_mem *acc = &c->accounting; int ret = 0; - percpu_down_write(&c->mark_lock); + guard(percpu_write)(&c->mark_lock); darray_for_each(acc->k, e) { e->v[1] = __alloc_percpu_gfp(e->nr_counters * sizeof(u64), sizeof(u64), GFP_KERNEL); @@ -565,20 +560,18 @@ int bch2_gc_accounting_start(struct bch_fs *c) } acc->gc_running = !ret; - percpu_up_write(&c->mark_lock); - return ret; } int bch2_gc_accounting_done(struct bch_fs *c) { struct bch_accounting_mem *acc = &c->accounting; - struct btree_trans *trans = bch2_trans_get(c); - struct printbuf buf = PRINTBUF; + CLASS(btree_trans, trans)(c); + CLASS(printbuf, buf)(); struct bpos pos = POS_MIN; int ret = 0; - percpu_down_write(&c->mark_lock); + guard(percpu_write)(&c->mark_lock); while (1) { unsigned idx = eytzinger0_find_ge(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, &pos); @@ -622,7 +615,8 @@ int bch2_gc_accounting_done(struct bch_fs *c) if (fsck_err(c, accounting_mismatch, "%s", buf.buf)) { percpu_up_write(&c->mark_lock); - ret = commit_do(trans, NULL, NULL, 0, + ret = commit_do(trans, NULL, NULL, + BCH_TRANS_COMMIT_skip_accounting_apply, bch2_disk_accounting_mod(trans, &acc_k, src_v, nr, false)); percpu_down_write(&c->mark_lock); if (ret) @@ -637,20 +631,16 @@ int bch2_gc_accounting_done(struct bch_fs *c) bkey_i_to_s_c_accounting(&k_i.k), BCH_ACCOUNTING_normal, true); - preempt_disable(); + guard(preempt)(); struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); struct bch_fs_usage_base *src = &trans->fs_usage_delta; acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64)); - preempt_enable(); } } } } err: fsck_err: - percpu_up_write(&c->mark_lock); - printbuf_exit(&buf); - bch2_trans_put(trans); bch_err_fn(c, ret); return ret; } @@ -662,11 +652,9 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) if (k.k->type != KEY_TYPE_accounting) return 0; - percpu_down_read(&c->mark_lock); - int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), - BCH_ACCOUNTING_read, false); - percpu_up_read(&c->mark_lock); - return ret; + guard(percpu_read)(&c->mark_lock); + return bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), + BCH_ACCOUNTING_read, false); } static int bch2_disk_accounting_validate_late(struct btree_trans *trans, @@ -674,7 +662,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, u64 *v, unsigned nr) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0, invalid_dev = -1; switch (acc->type) { @@ -723,7 +711,6 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, } fsck_err: - printbuf_exit(&buf); return ret; invalid_device: if (fsck_err(trans, accounting_to_invalid_device, @@ -751,8 +738,8 @@ invalid_device: int bch2_accounting_read(struct bch_fs *c) { struct bch_accounting_mem *acc = &c->accounting; - struct btree_trans *trans = bch2_trans_get(c); - struct printbuf buf = PRINTBUF; + CLASS(btree_trans, trans)(c); + CLASS(printbuf, buf)(); /* * We might run more than once if we rewind to start topology repair or @@ -761,13 +748,13 @@ int bch2_accounting_read(struct bch_fs *c) * * Instead, zero out any accounting we have: */ - percpu_down_write(&c->mark_lock); - darray_for_each(acc->k, e) - percpu_memset(e->v[0], 0, sizeof(u64) * e->nr_counters); - for_each_member_device(c, ca) - percpu_memset(ca->usage, 0, sizeof(*ca->usage)); - percpu_memset(c->usage, 0, sizeof(*c->usage)); - percpu_up_write(&c->mark_lock); + scoped_guard(percpu_write, &c->mark_lock) { + darray_for_each(acc->k, e) + percpu_memset(e->v[0], 0, sizeof(u64) * e->nr_counters); + for_each_member_device(c, ca) + percpu_memset(ca->usage, 0, sizeof(*ca->usage)); + percpu_memset(c->usage, 0, sizeof(*c->usage)); + } struct btree_iter iter; bch2_trans_iter_init(trans, &iter, BTREE_ID_accounting, POS_MIN, @@ -798,7 +785,7 @@ int bch2_accounting_read(struct bch_fs *c) accounting_read_key(trans, k); })); if (ret) - goto err; + return ret; struct journal_keys *keys = &c->journal_keys; struct journal_key *dst = keys->data; @@ -837,14 +824,14 @@ int bch2_accounting_read(struct bch_fs *c) ret = accounting_read_key(trans, k); if (ret) - goto err; + return ret; } *dst++ = *i; } keys->gap = keys->nr = dst - keys->data; - percpu_down_write(&c->mark_lock); + guard(percpu_write)(&c->mark_lock); darray_for_each_reverse(acc->k, i) { struct disk_accounting_pos acc_k; @@ -876,60 +863,55 @@ int bch2_accounting_read(struct bch_fs *c) } if (ret) - goto fsck_err; + return ret; } eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, NULL); - preempt_disable(); - struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); + scoped_guard(preempt) { + struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); - for (unsigned i = 0; i < acc->k.nr; i++) { - struct disk_accounting_pos k; - bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos); + for (unsigned i = 0; i < acc->k.nr; i++) { + struct disk_accounting_pos k; + bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos); - u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; - bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false); + u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; + bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false); - switch (k.type) { - case BCH_DISK_ACCOUNTING_persistent_reserved: - usage->reserved += v[0] * k.persistent_reserved.nr_replicas; - break; - case BCH_DISK_ACCOUNTING_replicas: - fs_usage_data_type_to_base(usage, k.replicas.data_type, v[0]); - break; - case BCH_DISK_ACCOUNTING_dev_data_type: { - guard(rcu)(); - struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev); - if (ca) { - struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type]; - percpu_u64_set(&d->buckets, v[0]); - percpu_u64_set(&d->sectors, v[1]); - percpu_u64_set(&d->fragmented, v[2]); - - if (k.dev_data_type.data_type == BCH_DATA_sb || - k.dev_data_type.data_type == BCH_DATA_journal) - usage->hidden += v[0] * ca->mi.bucket_size; + switch (k.type) { + case BCH_DISK_ACCOUNTING_persistent_reserved: + usage->reserved += v[0] * k.persistent_reserved.nr_replicas; + break; + case BCH_DISK_ACCOUNTING_replicas: + fs_usage_data_type_to_base(usage, k.replicas.data_type, v[0]); + break; + case BCH_DISK_ACCOUNTING_dev_data_type: { + guard(rcu)(); + struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev); + if (ca) { + struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type]; + percpu_u64_set(&d->buckets, v[0]); + percpu_u64_set(&d->sectors, v[1]); + percpu_u64_set(&d->fragmented, v[2]); + + if (k.dev_data_type.data_type == BCH_DATA_sb || + k.dev_data_type.data_type == BCH_DATA_journal) + usage->hidden += v[0] * ca->mi.bucket_size; + } + break; + } } - break; - } } } - preempt_enable(); -fsck_err: - percpu_up_write(&c->mark_lock); -err: - printbuf_exit(&buf); - bch2_trans_put(trans); - bch_err_fn(c, ret); + return ret; } int bch2_dev_usage_remove(struct bch_fs *c, unsigned dev) { - return bch2_trans_run(c, - bch2_btree_write_buffer_flush_sync(trans) ?: + CLASS(btree_trans, trans)(c); + return bch2_btree_write_buffer_flush_sync(trans) ?: for_each_btree_key_commit(trans, iter, BTREE_ID_accounting, POS_MIN, BTREE_ITER_all_snapshots, k, NULL, NULL, 0, ({ struct disk_accounting_pos acc; @@ -940,15 +922,16 @@ int bch2_dev_usage_remove(struct bch_fs *c, unsigned dev) ? bch2_btree_bit_mod_buffered(trans, BTREE_ID_accounting, k.k->p, 0) : 0; })) ?: - bch2_btree_write_buffer_flush_sync(trans)); + bch2_btree_write_buffer_flush_sync(trans); } int bch2_dev_usage_init(struct bch_dev *ca, bool gc) { struct bch_fs *c = ca->fs; + CLASS(btree_trans, trans)(c); u64 v[3] = { ca->mi.nbuckets - ca->mi.first_bucket, 0, 0 }; - int ret = bch2_trans_do(c, ({ + int ret = lockrestart_do(trans, ({ bch2_disk_accounting_mod2(trans, gc, v, dev_data_type, .dev = ca->dev_idx, @@ -964,78 +947,77 @@ void bch2_verify_accounting_clean(struct bch_fs *c) bool mismatch = false; struct bch_fs_usage_base base = {}, base_inmem = {}; - bch2_trans_run(c, - for_each_btree_key(trans, iter, - BTREE_ID_accounting, POS_MIN, - BTREE_ITER_all_snapshots, k, ({ - u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; - struct bkey_s_c_accounting a = bkey_s_c_to_accounting(k); - unsigned nr = bch2_accounting_counters(k.k); + CLASS(btree_trans, trans)(c); + for_each_btree_key(trans, iter, + BTREE_ID_accounting, POS_MIN, + BTREE_ITER_all_snapshots, k, ({ + u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; + struct bkey_s_c_accounting a = bkey_s_c_to_accounting(k); + unsigned nr = bch2_accounting_counters(k.k); - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, k.k->p); + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); - if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) - break; + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + break; - if (!bch2_accounting_is_mem(&acc_k)) { - struct disk_accounting_pos next; - memset(&next, 0, sizeof(next)); - next.type = acc_k.type + 1; - bch2_btree_iter_set_pos(trans, &iter, disk_accounting_pos_to_bpos(&next)); - continue; - } + if (!bch2_accounting_is_mem(&acc_k)) { + struct disk_accounting_pos next; + memset(&next, 0, sizeof(next)); + next.type = acc_k.type + 1; + bch2_btree_iter_set_pos(trans, &iter, disk_accounting_pos_to_bpos(&next)); + continue; + } - bch2_accounting_mem_read(c, k.k->p, v, nr); + bch2_accounting_mem_read(c, k.k->p, v, nr); - if (memcmp(a.v->d, v, nr * sizeof(u64))) { - struct printbuf buf = PRINTBUF; + if (memcmp(a.v->d, v, nr * sizeof(u64))) { + CLASS(printbuf, buf)(); - bch2_bkey_val_to_text(&buf, c, k); - prt_str(&buf, " !="); - for (unsigned j = 0; j < nr; j++) - prt_printf(&buf, " %llu", v[j]); + bch2_bkey_val_to_text(&buf, c, k); + prt_str(&buf, " !="); + for (unsigned j = 0; j < nr; j++) + prt_printf(&buf, " %llu", v[j]); - pr_err("%s", buf.buf); - printbuf_exit(&buf); - mismatch = true; - } + pr_err("%s", buf.buf); + mismatch = true; + } - switch (acc_k.type) { - case BCH_DISK_ACCOUNTING_persistent_reserved: - base.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; - break; - case BCH_DISK_ACCOUNTING_replicas: - fs_usage_data_type_to_base(&base, acc_k.replicas.data_type, a.v->d[0]); - break; - case BCH_DISK_ACCOUNTING_dev_data_type: - { - guard(rcu)(); /* scoped guard is a loop, and doesn't play nicely with continue */ - struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev); - if (!ca) - continue; - - v[0] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].buckets); - v[1] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].sectors); - v[2] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].fragmented); - } + switch (acc_k.type) { + case BCH_DISK_ACCOUNTING_persistent_reserved: + base.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; + break; + case BCH_DISK_ACCOUNTING_replicas: + fs_usage_data_type_to_base(&base, acc_k.replicas.data_type, a.v->d[0]); + break; + case BCH_DISK_ACCOUNTING_dev_data_type: { + { + guard(rcu)(); /* scoped guard is a loop, and doesn't play nicely with continue */ + struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev); + if (!ca) + continue; + + v[0] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].buckets); + v[1] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].sectors); + v[2] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].fragmented); + } - if (memcmp(a.v->d, v, 3 * sizeof(u64))) { - struct printbuf buf = PRINTBUF; + if (memcmp(a.v->d, v, 3 * sizeof(u64))) { + CLASS(printbuf, buf)(); - bch2_bkey_val_to_text(&buf, c, k); - prt_str(&buf, " in mem"); - for (unsigned j = 0; j < nr; j++) - prt_printf(&buf, " %llu", v[j]); + bch2_bkey_val_to_text(&buf, c, k); + prt_str(&buf, " in mem"); + for (unsigned j = 0; j < nr; j++) + prt_printf(&buf, " %llu", v[j]); - pr_err("dev accounting mismatch: %s", buf.buf); - printbuf_exit(&buf); - mismatch = true; - } + pr_err("dev accounting mismatch: %s", buf.buf); + mismatch = true; } + } + } - 0; - }))); + 0; + })); acc_u64s_percpu(&base_inmem.hidden, &c->usage->hidden, sizeof(base_inmem) / sizeof(u64)); diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index d61abebf3e0b..43f4b21d0aab 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -211,10 +211,8 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) { - percpu_down_read(&trans->c->mark_lock); - int ret = bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal, false); - percpu_up_read(&trans->c->mark_lock); - return ret; + guard(percpu_read)(&trans->c->mark_lock); + return bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal, false); } static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem *acc, @@ -236,13 +234,12 @@ static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem * static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p, u64 *v, unsigned nr) { - percpu_down_read(&c->mark_lock); + guard(percpu_read)(&c->mark_lock); struct bch_accounting_mem *acc = &c->accounting; unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, &p); bch2_accounting_mem_read_counters(acc, idx, v, nr, false); - percpu_up_read(&c->mark_lock); } static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset) diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index cde842ac1886..293e47268508 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -375,7 +375,7 @@ void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c) { bch2_printbuf_make_room(out, 4096); - out->atomic++; + guard(printbuf_atomic)(out); guard(rcu)(); struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups); @@ -396,16 +396,13 @@ void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c) next: prt_newline(out); } - - out->atomic--; } void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) { - out->atomic++; + guard(printbuf_atomic)(out); guard(rcu)(); - __bch2_disk_path_to_text(out, rcu_dereference(c->disk_groups), v), - --out->atomic; + __bch2_disk_path_to_text(out, rcu_dereference(c->disk_groups), v); } void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v) @@ -471,14 +468,9 @@ int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) { - int ret; - - mutex_lock(&c->sb_lock); - ret = __bch2_dev_group_set(c, ca, name) ?: + guard(mutex)(&c->sb_lock); + return __bch2_dev_group_set(c, ca, name) ?: bch2_write_super(c); - mutex_unlock(&c->sb_lock); - - return ret; } int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res, @@ -506,9 +498,8 @@ int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res, return 0; } - mutex_lock(&c->sb_lock); - g = bch2_disk_path_find(&c->disk_sb, val); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) + g = bch2_disk_path_find(&c->disk_sb, val); if (g >= 0) { *res = group_to_target(g); @@ -527,7 +518,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) prt_printf(out, "none"); return; case TARGET_DEV: { - out->atomic++; + guard(printbuf_atomic)(out); guard(rcu)(); struct bch_dev *ca = t.dev < c->sb.nr_devices ? rcu_dereference(c->devs[t.dev]) @@ -539,8 +530,6 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) prt_printf(out, "offline device %u", t.dev); else prt_printf(out, "invalid device %u", t.dev); - - out->atomic--; return; } case TARGET_GROUP: diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 687c3ba98095..8a8c94bd2580 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -197,8 +197,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans, bool parity = ptr_idx >= nr_data; enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe; s64 sectors = parity ? le16_to_cpu(s.v->sectors) : 0; - struct printbuf buf = PRINTBUF; - int ret = 0; + CLASS(printbuf, buf)(); struct bch_fs *c = trans->c; if (deleting) @@ -212,10 +211,8 @@ static int __mark_stripe_bucket(struct btree_trans *trans, bch2_data_type_str(a->data_type), a->dirty_sectors, a->stripe, s.k->p.offset, - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = bch_err_throw(c, mark_stripe); - goto err; - } + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) + return bch_err_throw(c, mark_stripe); if (bch2_trans_inconsistent_on(parity && bch2_bucket_sectors_total(*a), trans, "bucket %llu:%llu gen %u data type %s dirty_sectors %u cached_sectors %u: data already in parity bucket\n%s", @@ -223,30 +220,24 @@ static int __mark_stripe_bucket(struct btree_trans *trans, bch2_data_type_str(a->data_type), a->dirty_sectors, a->cached_sectors, - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = bch_err_throw(c, mark_stripe); - goto err; - } + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) + return bch_err_throw(c, mark_stripe); } else { if (bch2_trans_inconsistent_on(a->stripe != s.k->p.offset || a->stripe_redundancy != s.v->nr_redundant, trans, "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe (got %u)\n%s", bucket.inode, bucket.offset, a->gen, a->stripe, - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = bch_err_throw(c, mark_stripe); - goto err; - } + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) + return bch_err_throw(c, mark_stripe); if (bch2_trans_inconsistent_on(a->data_type != data_type, trans, "bucket %llu:%llu gen %u data type %s: wrong data type when stripe, should be %s\n%s", bucket.inode, bucket.offset, a->gen, bch2_data_type_str(a->data_type), bch2_data_type_str(data_type), - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = bch_err_throw(c, mark_stripe); - goto err; - } + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) + return bch_err_throw(c, mark_stripe); if (bch2_trans_inconsistent_on(parity && (a->dirty_sectors != -sectors || @@ -255,17 +246,15 @@ static int __mark_stripe_bucket(struct btree_trans *trans, bucket.inode, bucket.offset, a->gen, a->dirty_sectors, a->cached_sectors, - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = bch_err_throw(c, mark_stripe); - goto err; - } + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) + return bch_err_throw(c, mark_stripe); } if (sectors) { - ret = bch2_bucket_ref_update(trans, ca, s.s_c, ptr, sectors, data_type, - a->gen, a->data_type, &a->dirty_sectors); + int ret = bch2_bucket_ref_update(trans, ca, s.s_c, ptr, sectors, data_type, + a->gen, a->data_type, &a->dirty_sectors); if (ret) - goto err; + return ret; } if (!deleting) { @@ -277,9 +266,8 @@ static int __mark_stripe_bucket(struct btree_trans *trans, a->stripe_redundancy = 0; alloc_data_type_set(a, BCH_DATA_user); } -err: - printbuf_exit(&buf); - return ret; + + return 0; } static int mark_stripe_bucket(struct btree_trans *trans, @@ -289,14 +277,13 @@ static int mark_stripe_bucket(struct btree_trans *trans, { struct bch_fs *c = trans->c; const struct bch_extent_ptr *ptr = s.v->ptrs + ptr_idx; - struct printbuf buf = PRINTBUF; - int ret = 0; + CLASS(printbuf, buf)(); - struct bch_dev *ca = bch2_dev_tryget(c, ptr->dev); + CLASS(bch2_dev_tryget, ca)(c, ptr->dev); if (unlikely(!ca)) { if (ptr->dev != BCH_SB_MEMBER_INVALID && !(flags & BTREE_TRIGGER_overwrite)) - ret = bch_err_throw(c, mark_stripe); - goto err; + return bch_err_throw(c, mark_stripe); + return 0; } struct bpos bucket = PTR_BUCKET_POS(ca, ptr); @@ -312,36 +299,32 @@ static int mark_stripe_bucket(struct btree_trans *trans, struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); - ret = PTR_ERR_OR_ZERO(a) ?: + int ret = PTR_ERR_OR_ZERO(a) ?: __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &a->v, flags) ?: bch2_bucket_backpointer_mod(trans, s.s_c, &bp, !(flags & BTREE_TRIGGER_overwrite)); if (ret) - goto err; + return ret; } if (flags & BTREE_TRIGGER_gc) { struct bucket *g = gc_bucket(ca, bucket.offset); if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n%s", ptr->dev, - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = bch_err_throw(c, mark_stripe); - goto err; - } + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) + return bch_err_throw(c, mark_stripe); bucket_lock(g); struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; - ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags); + int ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags); alloc_to_bucket(g, new); bucket_unlock(g); if (!ret) ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); } -err: - bch2_dev_put(ca); - printbuf_exit(&buf); - return ret; + + return 0; } static int mark_stripe_buckets(struct btree_trans *trans, @@ -630,16 +613,15 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf) struct bch_csum got = ec_block_checksum(buf, i, offset); if (bch2_crc_cmp(want, got)) { - struct bch_dev *ca = bch2_dev_tryget(c, v->ptrs[i].dev); + CLASS(bch2_dev_tryget, ca)(c, v->ptrs[i].dev); if (ca) { - struct printbuf err = PRINTBUF; + CLASS(printbuf, err)(); prt_str(&err, "stripe "); bch2_csum_err_msg(&err, v->csum_type, want, got); prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i); bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key)); bch_err_ratelimited(ca, "%s", err.buf); - printbuf_exit(&err); bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); } @@ -703,8 +685,8 @@ static void ec_block_endio(struct bio *bio) struct closure *cl = bio->bi_private; int rw = ec_bio->rw; unsigned ref = rw == READ - ? BCH_DEV_READ_REF_ec_block - : BCH_DEV_WRITE_REF_ec_block; + ? (unsigned) BCH_DEV_READ_REF_ec_block + : (unsigned) BCH_DEV_WRITE_REF_ec_block; bch2_account_io_completion(ca, bio_data_dir(bio), ec_bio->submit_time, !bio->bi_status); @@ -741,8 +723,8 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, : BCH_DATA_parity; int rw = op_is_write(opf); unsigned ref = rw == READ - ? BCH_DEV_READ_REF_ec_block - : BCH_DEV_WRITE_REF_ec_block; + ? (unsigned) BCH_DEV_READ_REF_ec_block + : (unsigned) BCH_DEV_WRITE_REF_ec_block; struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, rw, ref); if (!ca) { @@ -832,7 +814,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio, struct bch_stripe *v; unsigned i, offset; const char *msg = NULL; - struct printbuf msgbuf = PRINTBUF; + CLASS(printbuf, msgbuf)(); int ret = 0; closure_init_stack(&cl); @@ -894,7 +876,6 @@ err: bch2_bkey_val_to_text(&msgbuf, c, orig_k); bch_err_ratelimited(c, "error doing reconstruct read: %s\n %s", msg, msgbuf.buf); - printbuf_exit(&msgbuf); ret = bch_err_throw(c, stripe_reconstruct); goto out; } @@ -936,31 +917,22 @@ static bool __bch2_stripe_is_open(struct bch_fs *c, u64 idx) static bool bch2_stripe_is_open(struct bch_fs *c, u64 idx) { - bool ret = false; - - spin_lock(&c->ec_stripes_new_lock); - ret = __bch2_stripe_is_open(c, idx); - spin_unlock(&c->ec_stripes_new_lock); - - return ret; + guard(spinlock)(&c->ec_stripes_new_lock); + return __bch2_stripe_is_open(c, idx); } static bool bch2_try_open_stripe(struct bch_fs *c, struct ec_stripe_new *s, u64 idx) { - bool ret; - - spin_lock(&c->ec_stripes_new_lock); - ret = !__bch2_stripe_is_open(c, idx); + guard(spinlock)(&c->ec_stripes_new_lock); + bool ret = !__bch2_stripe_is_open(c, idx); if (ret) { unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec_stripes_new))); s->idx = idx; hlist_add_head(&s->hash, &c->ec_stripes_new[hash]); } - spin_unlock(&c->ec_stripes_new_lock); - return ret; } @@ -968,9 +940,8 @@ static void bch2_stripe_close(struct bch_fs *c, struct ec_stripe_new *s) { BUG_ON(!s->idx); - spin_lock(&c->ec_stripes_new_lock); + guard(spinlock)(&c->ec_stripes_new_lock); hlist_del_init(&s->hash); - spin_unlock(&c->ec_stripes_new_lock); s->idx = 0; } @@ -1063,7 +1034,7 @@ static int ec_stripe_key_update(struct btree_trans *trans, unsigned sectors = stripe_blockcount_get(v, i); if (!bch2_extent_ptr_eq(old->v.ptrs[i], new->v.ptrs[i]) && sectors) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "stripe changed nonempty block %u", i); prt_str(&buf, "\nold: "); @@ -1071,7 +1042,6 @@ static int ec_stripe_key_update(struct btree_trans *trans, prt_str(&buf, "\nnew: "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new->k_i)); bch2_fs_inconsistent(c, "%s", buf.buf); - printbuf_exit(&buf); ret = -EINVAL; goto err; } @@ -1115,21 +1085,18 @@ static int ec_stripe_update_extent(struct btree_trans *trans, int ret, dev, block; if (bp.v->level) { - struct printbuf buf = PRINTBUF; struct btree_iter node_iter; - struct btree *b; - - b = bch2_backpointer_get_node(trans, bp, &node_iter, last_flushed); + struct btree *b = bch2_backpointer_get_node(trans, bp, &node_iter, last_flushed); bch2_trans_iter_exit(trans, &node_iter); if (!b) return 0; + CLASS(printbuf, buf)(); prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b); bch2_bkey_val_to_text(&buf, c, bp.s_c); bch2_fs_inconsistent(c, "%s", buf.buf); - printbuf_exit(&buf); return bch_err_throw(c, erasure_coding_found_btree_node); } @@ -1194,7 +1161,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b struct bch_extent_ptr ptr = v->ptrs[block]; int ret = 0; - struct bch_dev *ca = bch2_dev_tryget(c, ptr.dev); + CLASS(bch2_dev_tryget, ca)(c, ptr.dev); if (!ca) return bch_err_throw(c, ENOENT_dev_not_found); @@ -1225,28 +1192,26 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b })); bch2_bkey_buf_exit(&last_flushed, c); - bch2_dev_put(ca); return ret; } static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; unsigned nr_data = v->nr_blocks - v->nr_redundant; int ret = bch2_btree_write_buffer_flush_sync(trans); if (ret) - goto err; + return ret; for (unsigned i = 0; i < nr_data; i++) { ret = ec_stripe_update_bucket(trans, s, i); if (ret) - break; + return ret; } -err: - bch2_trans_put(trans); - return ret; + + return 0; } static void zero_out_rest_of_ec_bucket(struct bch_fs *c, @@ -1385,9 +1350,8 @@ err: } } - mutex_lock(&c->ec_stripe_new_lock); - list_del(&s->list); - mutex_unlock(&c->ec_stripe_new_lock); + scoped_guard(mutex, &c->ec_stripe_new_lock) + list_del(&s->list); wake_up(&c->ec_stripe_new_wait); ec_stripe_buf_exit(&s->existing_stripe); @@ -1401,15 +1365,11 @@ static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c) { struct ec_stripe_new *s; - mutex_lock(&c->ec_stripe_new_lock); + guard(mutex)(&c->ec_stripe_new_lock); list_for_each_entry(s, &c->ec_stripe_new_list, list) if (!atomic_read(&s->ref[STRIPE_REF_io])) - goto out; - s = NULL; -out: - mutex_unlock(&c->ec_stripe_new_lock); - - return s; + return s; + return NULL; } static void ec_stripe_create_work(struct work_struct *work) @@ -1443,9 +1403,8 @@ static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h h->s = NULL; s->pending = true; - mutex_lock(&c->ec_stripe_new_lock); - list_add(&s->list, &c->ec_stripe_new_list); - mutex_unlock(&c->ec_stripe_new_lock); + scoped_guard(mutex, &c->ec_stripe_new_lock) + list_add(&s->list, &c->ec_stripe_new_list); ec_stripe_new_put(c, s, STRIPE_REF_io); } @@ -2101,6 +2060,8 @@ allocated: BUG_ON(trans->restarted); return h; err: + if (waiting) + closure_wake_up(&c->freelist_wait); bch2_ec_stripe_head_put(c, h); return ERR_PTR(ret); } @@ -2199,13 +2160,13 @@ static int bch2_invalidate_stripe_to_dev_from_alloc(struct btree_trans *trans, s int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx, unsigned flags) { - int ret = bch2_trans_run(c, - for_each_btree_key_max_commit(trans, iter, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc, POS(dev_idx, 0), POS(dev_idx, U64_MAX), BTREE_ITER_intent, k, NULL, NULL, 0, ({ bch2_invalidate_stripe_to_dev_from_alloc(trans, k, flags); - }))); + })); bch_err_fn(c, ret); return ret; } @@ -2215,33 +2176,28 @@ int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx, unsigned flags) static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca) { struct ec_stripe_head *h; - struct open_bucket *ob; - unsigned i; - mutex_lock(&c->ec_stripe_head_lock); + guard(mutex)(&c->ec_stripe_head_lock); list_for_each_entry(h, &c->ec_stripe_head_list, list) { - mutex_lock(&h->lock); + guard(mutex)(&h->lock); if (!h->s) - goto unlock; + continue; if (!ca) goto found; - for (i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++) { + for (unsigned i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++) { if (!h->s->blocks[i]) continue; - ob = c->open_buckets + h->s->blocks[i]; + struct open_bucket *ob = c->open_buckets + h->s->blocks[i]; if (ob->dev == ca->dev_idx) goto found; } - goto unlock; + continue; found: ec_stripe_new_cancel(c, h, -BCH_ERR_erofs_no_writes); -unlock: - mutex_unlock(&h->lock); } - mutex_unlock(&c->ec_stripe_head_lock); } void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) @@ -2258,11 +2214,8 @@ static bool bch2_fs_ec_flush_done(struct bch_fs *c) { sched_annotate_sleep(); - mutex_lock(&c->ec_stripe_new_lock); - bool ret = list_empty(&c->ec_stripe_new_list); - mutex_unlock(&c->ec_stripe_new_lock); - - return ret; + guard(mutex)(&c->ec_stripe_new_lock); + return list_empty(&c->ec_stripe_new_list); } void bch2_fs_ec_flush(struct bch_fs *c) @@ -2299,41 +2252,40 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) struct ec_stripe_head *h; struct ec_stripe_new *s; - mutex_lock(&c->ec_stripe_head_lock); - list_for_each_entry(h, &c->ec_stripe_head_list, list) { - prt_printf(out, "disk label %u algo %u redundancy %u %s nr created %llu:\n", - h->disk_label, h->algo, h->redundancy, - bch2_watermarks[h->watermark], - h->nr_created); + scoped_guard(mutex, &c->ec_stripe_head_lock) + list_for_each_entry(h, &c->ec_stripe_head_list, list) { + prt_printf(out, "disk label %u algo %u redundancy %u %s nr created %llu:\n", + h->disk_label, h->algo, h->redundancy, + bch2_watermarks[h->watermark], + h->nr_created); - if (h->s) - bch2_new_stripe_to_text(out, c, h->s); - } - mutex_unlock(&c->ec_stripe_head_lock); + if (h->s) + bch2_new_stripe_to_text(out, c, h->s); + } prt_printf(out, "in flight:\n"); - mutex_lock(&c->ec_stripe_new_lock); - list_for_each_entry(s, &c->ec_stripe_new_list, list) - bch2_new_stripe_to_text(out, c, s); - mutex_unlock(&c->ec_stripe_new_lock); + scoped_guard(mutex, &c->ec_stripe_new_lock) + list_for_each_entry(s, &c->ec_stripe_new_list, list) + bch2_new_stripe_to_text(out, c, s); } void bch2_fs_ec_exit(struct bch_fs *c) { - struct ec_stripe_head *h; - unsigned i; while (1) { - mutex_lock(&c->ec_stripe_head_lock); - h = list_pop_entry(&c->ec_stripe_head_list, struct ec_stripe_head, list); - mutex_unlock(&c->ec_stripe_head_lock); + struct ec_stripe_head *h; + + scoped_guard(mutex, &c->ec_stripe_head_lock) + h = list_pop_entry(&c->ec_stripe_head_list, struct ec_stripe_head, list); if (!h) break; if (h->s) { - for (i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++) + for (unsigned i = 0; + i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; + i++) BUG_ON(h->s->blocks[i]); kfree(h->s); @@ -2386,20 +2338,18 @@ static int bch2_check_stripe_to_lru_ref(struct btree_trans *trans, return 0; } -int bch2_check_stripe_to_lru_refs(struct bch_fs *c) +int bch2_check_stripe_to_lru_refs(struct btree_trans *trans) { struct bkey_buf last_flushed; - bch2_bkey_buf_init(&last_flushed); bkey_init(&last_flushed.k->k); - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_stripes, + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_stripes, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_stripe_to_lru_ref(trans, k, &last_flushed))); + bch2_check_stripe_to_lru_ref(trans, k, &last_flushed)); - bch2_bkey_buf_exit(&last_flushed, c); - bch_err_fn(c, ret); + bch2_bkey_buf_exit(&last_flushed, trans->c); + bch_err_fn(trans->c, ret); return ret; } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 548048adf0d5..e807e7027d7a 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -304,6 +304,6 @@ void bch2_fs_ec_exit(struct bch_fs *); void bch2_fs_ec_init_early(struct bch_fs *); int bch2_fs_ec_init(struct bch_fs *); -int bch2_check_stripe_to_lru_refs(struct bch_fs *); +int bch2_check_stripe_to_lru_refs(struct btree_trans *); #endif /* _BCACHEFS_EC_H */ diff --git a/fs/bcachefs/enumerated_ref.c b/fs/bcachefs/enumerated_ref.c index 56ab430f209f..2ded74135977 100644 --- a/fs/bcachefs/enumerated_ref.c +++ b/fs/bcachefs/enumerated_ref.c @@ -75,13 +75,11 @@ void enumerated_ref_stop(struct enumerated_ref *ref, { enumerated_ref_stop_async(ref); while (!wait_for_completion_timeout(&ref->stop_complete, HZ * 10)) { - struct printbuf buf = PRINTBUF; - + CLASS(printbuf, buf)(); prt_str(&buf, "Waited for 10 seconds to shutdown enumerated ref\n"); prt_str(&buf, "Outstanding refs:\n"); enumerated_ref_to_text(&buf, ref, names); printk(KERN_ERR "%s", buf.buf); - printbuf_exit(&buf); } } diff --git a/fs/bcachefs/errcode.c b/fs/bcachefs/errcode.c index c39cf304c681..86264b8c343c 100644 --- a/fs/bcachefs/errcode.c +++ b/fs/bcachefs/errcode.c @@ -26,7 +26,8 @@ const char *bch2_err_str(int err) err = abs(err); - BUG_ON(err >= BCH_ERR_MAX); + if (err >= BCH_ERR_MAX) + return "(Invalid error)"; if (err >= BCH_ERR_START) errstr = bch2_errcode_strs[err - BCH_ERR_START]; diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 71649b4164b8..32a286b3a74e 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -42,15 +42,14 @@ bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out) bool bch2_inconsistent_error(struct bch_fs *c) { - struct printbuf buf = PRINTBUF; - buf.atomic++; + CLASS(printbuf, buf)(); + guard(printbuf_atomic)(&buf); printbuf_indent_add_nextline(&buf, 2); bool ret = __bch2_inconsistent_error(c, &buf); if (ret) bch_err(c, "%s", buf.buf); - printbuf_exit(&buf); return ret; } @@ -58,8 +57,8 @@ __printf(3, 0) static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *trans, const char *fmt, va_list args) { - struct printbuf buf = PRINTBUF; - buf.atomic++; + CLASS(printbuf, buf)(); + guard(printbuf_atomic)(&buf); bch2_log_msg_start(c, &buf); @@ -70,8 +69,6 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra bch2_trans_updates_to_text(&buf, trans); bool ret = __bch2_inconsistent_error(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - - printbuf_exit(&buf); return ret; } @@ -109,8 +106,7 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...) { - struct printbuf buf = PRINTBUF; - + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); va_list args; @@ -120,8 +116,6 @@ int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...) int ret = __bch2_topology_error(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - - printbuf_exit(&buf); return ret; } @@ -138,18 +132,18 @@ void bch2_io_error_work(struct work_struct *work) /* XXX: if it's reads or checksums that are failing, set it to failed */ - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); unsigned long write_errors_start = READ_ONCE(ca->write_errors_start); if (write_errors_start && time_after(jiffies, write_errors_start + c->opts.write_error_timeout * HZ)) { if (ca->mi.state >= BCH_MEMBER_STATE_ro) - goto out; + return; bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, BCH_FORCE_IF_DEGRADED); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); __bch2_log_msg_start(ca->name, &buf); prt_printf(&buf, "writes erroring for %u seconds, setting %s ro", @@ -159,10 +153,7 @@ void bch2_io_error_work(struct work_struct *work) bch2_fs_emergency_read_only2(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } -out: - up_write(&c->state_lock); } void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type) @@ -382,11 +373,10 @@ bool __bch2_count_fsck_err(struct bch_fs *c, { bch2_sb_error_count(c, id); - mutex_lock(&c->fsck_error_msgs_lock); bool print = true, repeat = false, suppress = false; - count_fsck_err_locked(c, id, msg->buf, &repeat, &print, &suppress); - mutex_unlock(&c->fsck_error_msgs_lock); + scoped_guard(mutex, &c->fsck_error_msgs_lock) + count_fsck_err_locked(c, id, msg->buf, &repeat, &print, &suppress); if (suppress) prt_printf(msg, "Ratelimiting new instances of previous error\n"); @@ -444,7 +434,8 @@ int __bch2_fsck_err(struct bch_fs *c, const char *fmt, ...) { va_list args; - struct printbuf buf = PRINTBUF, *out = &buf; + CLASS(printbuf, buf)(); + struct printbuf *out = &buf; int ret = 0; const char *action_orig = "fix?", *action = action_orig; @@ -637,7 +628,9 @@ err: * log_fsck_err()s: that would require us to track for every error type * which recovery pass corrects it, to get the fsck exit status correct: */ - if (bch2_err_matches(ret, BCH_ERR_fsck_fix)) { + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + /* nothing */ + } else if (bch2_err_matches(ret, BCH_ERR_fsck_fix)) { set_bit(BCH_FS_errors_fixed, &c->flags); } else { set_bit(BCH_FS_errors_not_fixed, &c->flags); @@ -646,7 +639,6 @@ err: if (action != action_orig) kfree(action); - printbuf_exit(&buf); BUG_ON(!ret); return ret; @@ -678,7 +670,7 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) fsck_flags |= fsck_flags_extra[err]; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "invalid bkey in %s", bch2_bkey_validate_contexts[from.from]); @@ -699,7 +691,6 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, va_end(args); int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s, delete?", buf.buf); - printbuf_exit(&buf); return ret; } @@ -707,7 +698,7 @@ static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print) { struct fsck_err_state *s, *n; - mutex_lock(&c->fsck_error_msgs_lock); + guard(mutex)(&c->fsck_error_msgs_lock); list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { if (print && s->ratelimited && s->last_msg) @@ -717,8 +708,6 @@ static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print) kfree(s->last_msg); kfree(s); } - - mutex_unlock(&c->fsck_error_msgs_lock); } void bch2_flush_fsck_errs(struct bch_fs *c) @@ -752,7 +741,8 @@ int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *o void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out, subvol_inum inum, u64 offset) { - bch2_trans_do(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset)); + CLASS(btree_trans, trans)(c); + lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, out, inum, offset)); } int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out, @@ -769,5 +759,6 @@ int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printb void bch2_inum_snap_offset_err_msg(struct bch_fs *c, struct printbuf *out, struct bpos pos) { - bch2_trans_do(c, bch2_inum_snap_offset_err_msg_trans(trans, out, pos)); + CLASS(btree_trans, trans)(c); + lockrestart_do(trans, bch2_inum_snap_offset_err_msg_trans(trans, out, pos)); } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 83cbd77dcb9c..b36ecfc0ab9d 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -63,15 +63,14 @@ void bch2_io_failures_to_text(struct printbuf *out, ((!!f->failed_ec) << 3); bch2_printbuf_make_room(out, 1024); - out->atomic++; scoped_guard(rcu) { + guard(printbuf_atomic)(out); struct bch_dev *ca = bch2_dev_rcu_noerror(c, f->dev); if (ca) prt_str(out, ca->name); else prt_printf(out, "(invalid device %u)", f->dev); } - --out->atomic; prt_char(out, ' '); @@ -1023,6 +1022,18 @@ bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target) return false; } +bool bch2_bkey_in_target(struct bch_fs *c, struct bkey_s_c k, unsigned target) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + guard(rcu)(); + bkey_for_each_ptr(ptrs, ptr) + if (!bch2_dev_in_target(c, ptr->dev, target)) + return false; + + return true; +} + bool bch2_bkey_matches_ptr(struct bch_fs *c, struct bkey_s_c k, struct bch_extent_ptr m, u64 offset) { @@ -1225,7 +1236,7 @@ restart_drop_ptrs: void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr) { - out->atomic++; + guard(printbuf_atomic)(out); guard(rcu)(); struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev); if (!ca) { @@ -1250,7 +1261,6 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc else if (stale) prt_printf(out, " invalid"); } - --out->atomic; } void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_crc_unpacked *crc) @@ -1512,7 +1522,7 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, const struct bch_extent_rebalance *r = &entry->rebalance; if (!bch2_compression_opt_valid(r->compression)) { - struct bch_compression_opt opt = __bch2_compression_decode(r->compression); + union bch_compression_opt opt = { .value = r->compression }; prt_printf(err, "invalid compression opt %u:%u", opt.type, opt.level); return bch_err_throw(c, invalid_bkey); diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index b8590e51b76e..f212f91c278d 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -615,6 +615,7 @@ static inline struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s k, unsig } bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned); +bool bch2_bkey_in_target(struct bch_fs *, struct bkey_s_c, unsigned); void bch2_bkey_extent_entry_drop(struct bkey_i *, union bch_extent_entry *); diff --git a/fs/bcachefs/fast_list.c b/fs/bcachefs/fast_list.c index b52f264318d8..6be2a45be1dd 100644 --- a/fs/bcachefs/fast_list.c +++ b/fs/bcachefs/fast_list.c @@ -138,8 +138,21 @@ void fast_list_remove(struct fast_list *l, unsigned idx) void fast_list_exit(struct fast_list *l) { - /* XXX: warn if list isn't empty */ - free_percpu(l->buffer); + if (l->buffer) { + int cpu; + for_each_possible_cpu(cpu) { + struct fast_list_pcpu *lp = per_cpu_ptr(l->buffer, cpu); + + while (lp->nr) + ida_free(&l->slots_allocated, lp->entries[--lp->nr]); + } + + free_percpu(l->buffer); + } + + WARN(ida_find_first(&l->slots_allocated) >= 0, + "fast_list still has objects on exit\n"); + ida_destroy(&l->slots_allocated); genradix_free(&l->items); } diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 4e82dfa6c03f..f2389054693a 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -254,12 +254,11 @@ err: bch2_trans_iter_exit(trans, &iter); if (ret) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter.pos.offset << 9)); prt_printf(&buf, "read error %s from btree lookup", bch2_err_str(ret)); bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); rbio->bio.bi_status = BLK_STS_IOERR; bio_endio(&rbio->bio); @@ -425,27 +424,23 @@ static void bch2_writepage_io_done(struct bch_write_op *op) set_bit(EI_INODE_ERROR, &io->inode->ei_flags); bio_for_each_folio_all(fi, bio) { - struct bch_folio *s; - mapping_set_error(fi.folio->mapping, -EIO); - s = __bch2_folio(fi.folio); - spin_lock(&s->lock); + struct bch_folio *s = __bch2_folio(fi.folio); + guard(spinlock)(&s->lock); + for (i = 0; i < folio_sectors(fi.folio); i++) s->s[i].nr_replicas = 0; - spin_unlock(&s->lock); } } if (io->op.flags & BCH_WRITE_wrote_data_inline) { bio_for_each_folio_all(fi, bio) { - struct bch_folio *s; + struct bch_folio *s = __bch2_folio(fi.folio); + guard(spinlock)(&s->lock); - s = __bch2_folio(fi.folio); - spin_lock(&s->lock); for (i = 0; i < folio_sectors(fi.folio); i++) s->s[i].nr_replicas = 0; - spin_unlock(&s->lock); } } @@ -571,30 +566,30 @@ do_io: BUG_ON(ret); /* Before unlocking the page, get copy of reservations: */ - spin_lock(&s->lock); - memcpy(w->tmp, s->s, sizeof(struct bch_folio_sector) * f_sectors); + scoped_guard(spinlock, &s->lock) { + memcpy(w->tmp, s->s, sizeof(struct bch_folio_sector) * f_sectors); - for (i = 0; i < f_sectors; i++) { - if (s->s[i].state < SECTOR_dirty) - continue; + for (i = 0; i < f_sectors; i++) { + if (s->s[i].state < SECTOR_dirty) + continue; - nr_replicas_this_write = - min_t(unsigned, nr_replicas_this_write, - s->s[i].nr_replicas + - s->s[i].replicas_reserved); - } + nr_replicas_this_write = + min_t(unsigned, nr_replicas_this_write, + s->s[i].nr_replicas + + s->s[i].replicas_reserved); + } - for (i = 0; i < f_sectors; i++) { - if (s->s[i].state < SECTOR_dirty) - continue; + for (i = 0; i < f_sectors; i++) { + if (s->s[i].state < SECTOR_dirty) + continue; - s->s[i].nr_replicas = w->opts.compression - ? 0 : nr_replicas_this_write; + s->s[i].nr_replicas = w->opts.compression + ? 0 : nr_replicas_this_write; - s->s[i].replicas_reserved = 0; - bch2_folio_sector_set(folio, s, i, SECTOR_allocated); + s->s[i].replicas_reserved = 0; + bch2_folio_sector_set(folio, s, i, SECTOR_allocated); + } } - spin_unlock(&s->lock); BUG_ON(atomic_read(&s->write_count)); atomic_set(&s->write_count, 1); @@ -780,10 +775,9 @@ int bch2_write_end(struct file *file, struct address_space *mapping, copied = 0; } - spin_lock(&inode->v.i_lock); - if (pos + copied > inode->v.i_size) - i_size_write(&inode->v, pos + copied); - spin_unlock(&inode->v.i_lock); + scoped_guard(spinlock, &inode->v.i_lock) + if (pos + copied > inode->v.i_size) + i_size_write(&inode->v, pos + copied); if (copied) { if (!folio_test_uptodate(folio)) @@ -942,10 +936,9 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, end = pos + copied; - spin_lock(&inode->v.i_lock); - if (end > inode->v.i_size) - i_size_write(&inode->v, end); - spin_unlock(&inode->v.i_lock); + scoped_guard(spinlock, &inode->v.i_lock) + if (end > inode->v.i_size) + i_size_write(&inode->v, end); f_pos = pos; f_offset = pos - folio_pos(darray_first(fs)); diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 1f5154d9676b..73d44875faf2 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -252,7 +252,7 @@ static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum, u64 offset, u64 size, unsigned nr_replicas, bool compressed) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter; struct bkey_s_c k; u64 end = offset + size; @@ -285,7 +285,6 @@ retry: err: if (bch2_err_matches(err, BCH_ERR_transaction_restart)) goto retry; - bch2_trans_put(trans); return err ? false : ret; } @@ -428,17 +427,15 @@ static __always_inline void bch2_dio_write_end(struct dio_write *dio) dio->written += dio->op.written; if (dio->extending) { - spin_lock(&inode->v.i_lock); + guard(spinlock)(&inode->v.i_lock); if (req->ki_pos > inode->v.i_size) i_size_write(&inode->v, req->ki_pos); - spin_unlock(&inode->v.i_lock); } if (dio->op.i_sectors_delta || dio->quota_res.sectors) { - mutex_lock(&inode->ei_quota_lock); + guard(mutex)(&inode->ei_quota_lock); __bch2_i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta); __bch2_quota_reservation_put(c, inode, &dio->quota_res); - mutex_unlock(&inode->ei_quota_lock); } bio_release_pages(bio, false); diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c index c2cc405822f2..2a6705186c44 100644 --- a/fs/bcachefs/fs-io-pagecache.c +++ b/fs/bcachefs/fs-io-pagecache.c @@ -125,11 +125,9 @@ folio_sector_reserve(enum bch_folio_sector_state state) /* for newly allocated folios: */ struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp) { - struct bch_folio *s; - - s = kzalloc(sizeof(*s) + - sizeof(struct bch_folio_sector) * - folio_sectors(folio), gfp); + struct bch_folio *s = kzalloc(sizeof(*s) + + sizeof(struct bch_folio_sector) * + folio_sectors(folio), gfp); if (!s) return NULL; @@ -162,7 +160,7 @@ static void __bch2_folio_set(struct folio *folio, BUG_ON(pg_offset >= sectors); BUG_ON(pg_offset + pg_len > sectors); - spin_lock(&s->lock); + guard(spinlock)(&s->lock); for (i = pg_offset; i < pg_offset + pg_len; i++) { s->s[i].nr_replicas = nr_ptrs; @@ -171,8 +169,6 @@ static void __bch2_folio_set(struct folio *folio, if (i == sectors) s->uptodate = true; - - spin_unlock(&s->lock); } /* @@ -276,10 +272,9 @@ void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode, s = bch2_folio(folio); if (s) { - spin_lock(&s->lock); + guard(spinlock)(&s->lock); for (j = folio_offset; j < folio_offset + folio_len; j++) s->s[j].nr_replicas = 0; - spin_unlock(&s->lock); } folio_unlock(folio); @@ -330,13 +325,12 @@ int bch2_mark_pagecache_reserved(struct bch_inode_info *inode, unsigned folio_offset = max(*start, folio_start) - folio_start; unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; - spin_lock(&s->lock); + guard(spinlock)(&s->lock); for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) { i_sectors_delta -= s->s[j].state == SECTOR_dirty; bch2_folio_sector_set(folio, s, j, folio_sector_reserve(s->s[j].state)); } - spin_unlock(&s->lock); } folio_unlock(folio); @@ -529,29 +523,26 @@ void bch2_set_folio_dirty(struct bch_fs *c, BUG_ON(!s->uptodate); - spin_lock(&s->lock); - - for (i = round_down(offset, block_bytes(c)) >> 9; - i < round_up(offset + len, block_bytes(c)) >> 9; - i++) { - unsigned sectors = sectors_to_reserve(&s->s[i], - res->disk.nr_replicas); - - /* - * This can happen if we race with the error path in - * bch2_writepage_io_done(): - */ - sectors = min_t(unsigned, sectors, res->disk.sectors); + scoped_guard(spinlock, &s->lock) + for (i = round_down(offset, block_bytes(c)) >> 9; + i < round_up(offset + len, block_bytes(c)) >> 9; + i++) { + unsigned sectors = sectors_to_reserve(&s->s[i], + res->disk.nr_replicas); - s->s[i].replicas_reserved += sectors; - res->disk.sectors -= sectors; + /* + * This can happen if we race with the error path in + * bch2_writepage_io_done(): + */ + sectors = min_t(unsigned, sectors, res->disk.sectors); - dirty_sectors += s->s[i].state == SECTOR_unallocated; + s->s[i].replicas_reserved += sectors; + res->disk.sectors -= sectors; - bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state)); - } + dirty_sectors += s->s[i].state == SECTOR_unallocated; - spin_unlock(&s->lock); + bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state)); + } bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors); diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index dc5f713e209c..93ad33f0953a 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -148,7 +148,7 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, struct quota_res *quota_res, s64 sectors) { if (unlikely((s64) inode->v.i_blocks + sectors < 0)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", inode->v.i_ino, (u64) inode->v.i_blocks, sectors, @@ -157,7 +157,6 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, &buf); if (print) bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); if (sectors < 0) sectors = -inode->v.i_blocks; @@ -187,7 +186,6 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, static int bch2_get_inode_journal_seq_trans(struct btree_trans *trans, subvol_inum inum, u64 *seq) { - struct printbuf buf = PRINTBUF; struct bch_inode_unpacked u; struct btree_iter iter; int ret = bch2_inode_peek(trans, &iter, &u, inum, 0); @@ -197,6 +195,7 @@ static int bch2_get_inode_journal_seq_trans(struct btree_trans *trans, subvol_in u64 cur_seq = journal_cur_seq(&trans->c->journal); *seq = min(cur_seq, u.bi_journal_seq); + CLASS(printbuf, buf)(); if (fsck_err_on(u.bi_journal_seq > cur_seq, trans, inode_journal_seq_in_future, "inode journal seq in future (currently at %llu)\n%s", @@ -208,7 +207,6 @@ static int bch2_get_inode_journal_seq_trans(struct btree_trans *trans, subvol_in } fsck_err: bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); return ret; } @@ -227,7 +225,7 @@ static int bch2_flush_inode(struct bch_fs *c, u64 seq; int ret = bch2_trans_commit_do(c, NULL, NULL, 0, - bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?: + bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?: bch2_journal_flush_seq(&c->journal, seq, TASK_INTERRUPTIBLE) ?: bch2_inode_flush_nocow_writes(c, inode); enumerated_ref_put(&c->writes, BCH_WRITE_REF_fsync); @@ -267,11 +265,11 @@ static inline int range_has_data(struct bch_fs *c, u32 subvol, struct bpos start, struct bpos end) { - return bch2_trans_run(c, - for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, start, end, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, start, end, subvol, 0, k, ({ - bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k); - }))); + bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k); + })); } static int __bch2_truncate_folio(struct bch_inode_info *inode, @@ -521,7 +519,7 @@ int bchfs_truncate(struct mnt_idmap *idmap, if (unlikely(!inode->v.i_size && inode->v.i_blocks && !bch2_journal_error(&c->journal))) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", @@ -531,7 +529,6 @@ int bchfs_truncate(struct mnt_idmap *idmap, bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, &buf); if (print) bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } ret = bch2_setattr_nonsize(idmap, inode, iattr); @@ -559,11 +556,10 @@ static noinline long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, l u64 block_start = round_up(offset, block_bytes(c)); u64 block_end = round_down(end, block_bytes(c)); bool truncated_last_page; - int ret = 0; - ret = bch2_truncate_folios(inode, offset, end); + int ret = bch2_truncate_folios(inode, offset, end); if (unlikely(ret < 0)) - goto err; + return ret; truncated_last_page = ret; @@ -576,19 +572,18 @@ static noinline long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, l block_start >> 9, block_end >> 9, &i_sectors_delta); bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); + + if (ret) + return ret; } - mutex_lock(&inode->ei_update_lock); - if (end >= inode->v.i_size && !truncated_last_page) { - ret = bch2_write_inode_size(c, inode, inode->v.i_size, - ATTR_MTIME|ATTR_CTIME); - } else { - ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, + guard(mutex)(&inode->ei_update_lock); + if (end >= inode->v.i_size && !truncated_last_page) + return bch2_write_inode_size(c, inode, inode->v.i_size, + ATTR_MTIME|ATTR_CTIME); + else + return bch2_write_inode(c, inode, inode_update_times_fn, NULL, ATTR_MTIME|ATTR_CTIME); - } - mutex_unlock(&inode->ei_update_lock); -err: - return ret; } static noinline long bchfs_fcollapse_finsert(struct bch_inode_info *inode, @@ -631,7 +626,7 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode, u64 start_sector, u64 end_sector) { struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter; struct bpos end_pos = POS(inode->v.i_ino, end_sector); struct bch_io_opts opts; @@ -753,7 +748,6 @@ bkey_err: } bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return ret; } @@ -802,13 +796,11 @@ static noinline long bchfs_fallocate(struct bch_inode_info *inode, int mode, if (end >= inode->v.i_size && (((mode & FALLOC_FL_ZERO_RANGE) && !truncated_last_page) || !(mode & FALLOC_FL_KEEP_SIZE))) { - spin_lock(&inode->v.i_lock); - i_size_write(&inode->v, end); - spin_unlock(&inode->v.i_lock); + scoped_guard(spinlock, &inode->v.i_lock) + i_size_write(&inode->v, end); - mutex_lock(&inode->ei_update_lock); - ret2 = bch2_write_inode_size(c, inode, end, 0); - mutex_unlock(&inode->ei_update_lock); + scoped_guard(mutex, &inode->ei_update_lock) + ret2 = bch2_write_inode_size(c, inode, end, 0); } return ret ?: ret2; @@ -861,8 +853,8 @@ static int quota_reserve_range(struct bch_inode_info *inode, struct bch_fs *c = inode->v.i_sb->s_fs_info; u64 sectors = end - start; - int ret = bch2_trans_run(c, - for_each_btree_key_in_subvolume_max(trans, iter, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, POS(inode->v.i_ino, start), POS(inode->v.i_ino, end - 1), @@ -875,7 +867,7 @@ static int quota_reserve_range(struct bch_inode_info *inode, } 0; - }))); + })); return ret ?: bch2_quota_reservation_add(c, inode, res, sectors, true); } @@ -955,10 +947,9 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, bch2_i_sectors_acct(c, dst, "a_res, i_sectors_delta); - spin_lock(&dst->v.i_lock); - if (pos_dst + ret > dst->v.i_size) - i_size_write(&dst->v, pos_dst + ret); - spin_unlock(&dst->v.i_lock); + scoped_guard(spinlock, &dst->v.i_lock) + if (pos_dst + ret > dst->v.i_size) + i_size_write(&dst->v, pos_dst + ret); if ((file_dst->f_flags & (__O_SYNC | O_DSYNC)) || IS_SYNC(file_inode(file_dst))) @@ -1020,38 +1011,38 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) if (offset >= isize) return -ENXIO; - int ret = bch2_trans_run(c, - for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, POS(inode->v.i_ino, offset >> 9), POS(inode->v.i_ino, U64_MAX), inum.subvol, BTREE_ITER_slots, k, ({ - if (k.k->p.inode != inode->v.i_ino || - !bkey_extent_is_data(k.k)) { - loff_t start_offset = k.k->p.inode == inode->v.i_ino - ? max(offset, bkey_start_offset(k.k) << 9) - : offset; - loff_t end_offset = k.k->p.inode == inode->v.i_ino - ? MAX_LFS_FILESIZE - : k.k->p.offset << 9; - - /* - * Found a hole in the btree, now make sure it's - * a hole in the pagecache. We might have to - * keep searching if this hole is entirely dirty - * in the page cache: - */ - bch2_trans_unlock(trans); - loff_t pagecache_hole = bch2_seek_pagecache_hole(&inode->v, - start_offset, end_offset, 0, false); - if (pagecache_hole < end_offset) { - next_hole = pagecache_hole; - break; - } - } else { - offset = max(offset, bkey_start_offset(k.k) << 9); + if (k.k->p.inode != inode->v.i_ino || + !bkey_extent_is_data(k.k)) { + loff_t start_offset = k.k->p.inode == inode->v.i_ino + ? max(offset, bkey_start_offset(k.k) << 9) + : offset; + loff_t end_offset = k.k->p.inode == inode->v.i_ino + ? MAX_LFS_FILESIZE + : k.k->p.offset << 9; + + /* + * Found a hole in the btree, now make sure it's + * a hole in the pagecache. We might have to + * keep searching if this hole is entirely dirty + * in the page cache: + */ + bch2_trans_unlock(trans); + loff_t pagecache_hole = bch2_seek_pagecache_hole(&inode->v, + start_offset, end_offset, 0, false); + if (pagecache_hole < end_offset) { + next_hole = pagecache_hole; + break; } - 0; - }))); + } else { + offset = max(offset, bkey_start_offset(k.k) << 9); + } + 0; + })); if (ret) return ret; diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h index ca70346e68dc..d229f7225da1 100644 --- a/fs/bcachefs/fs-io.h +++ b/fs/bcachefs/fs-io.h @@ -77,9 +77,8 @@ static inline void bch2_quota_reservation_put(struct bch_fs *c, struct quota_res *res) { if (res->sectors) { - mutex_lock(&inode->ei_quota_lock); + guard(mutex)(&inode->ei_quota_lock); __bch2_quota_reservation_put(c, inode, res); - mutex_unlock(&inode->ei_quota_lock); } } @@ -94,16 +93,15 @@ static inline int bch2_quota_reservation_add(struct bch_fs *c, if (test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags)) return 0; - mutex_lock(&inode->ei_quota_lock); + guard(mutex)(&inode->ei_quota_lock); ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK); - if (likely(!ret)) { - inode->ei_quota_reserved += sectors; - res->sectors += sectors; - } - mutex_unlock(&inode->ei_quota_lock); + if (ret) + return ret; - return ret; + inode->ei_quota_reserved += sectors; + res->sectors += sectors; + return 0; } #else @@ -134,9 +132,8 @@ static inline void bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info * struct quota_res *quota_res, s64 sectors) { if (sectors) { - mutex_lock(&inode->ei_quota_lock); + guard(mutex)(&inode->ei_quota_lock); __bch2_i_sectors_acct(c, inode, quota_res, sectors); - mutex_unlock(&inode->ei_quota_lock); } } diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 4e72e654da96..8b9d3c7d1f57 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -111,9 +111,8 @@ static int bch2_ioc_getlabel(struct bch_fs *c, char __user *user_label) BUILD_BUG_ON(BCH_SB_LABEL_SIZE >= FSLABEL_MAX); - mutex_lock(&c->sb_lock); - memcpy(label, c->disk_sb.sb->label, BCH_SB_LABEL_SIZE); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) + memcpy(label, c->disk_sb.sb->label, BCH_SB_LABEL_SIZE); len = strnlen(label, BCH_SB_LABEL_SIZE); if (len == BCH_SB_LABEL_SIZE) { @@ -152,10 +151,10 @@ static int bch2_ioc_setlabel(struct bch_fs *c, if (ret) return ret; - mutex_lock(&c->sb_lock); - strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE); - ret = bch2_write_super(c); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) { + strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE); + ret = bch2_write_super(c); + } mnt_drop_write_file(file); return ret; @@ -172,7 +171,7 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg) if (get_user(flags, arg)) return -EFAULT; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "shutdown by ioctl type %u", flags); @@ -193,13 +192,10 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg) bch2_fs_emergency_read_only2(c, &buf); break; default: - ret = -EINVAL; - goto noprint; + return -EINVAL; } bch2_print_str(c, KERN_ERR, buf.buf); -noprint: - printbuf_exit(&buf); return ret; } @@ -234,9 +230,8 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) { /* sync_inodes_sb enforce s_umount is locked */ - down_read(&c->vfs_sb->s_umount); + guard(rwsem_read)(&c->vfs_sb->s_umount); sync_inodes_sb(c->vfs_sb); - up_read(&c->vfs_sb->s_umount); } if (arg.src_ptr) { @@ -301,12 +296,10 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, !arg.src_ptr) snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol; - down_write(&c->snapshot_create_lock); - inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir), - dst_dentry, arg.mode|S_IFDIR, - 0, snapshot_src, create_flags); - up_write(&c->snapshot_create_lock); - + scoped_guard(rwsem_write, &c->snapshot_create_lock) + inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir), + dst_dentry, arg.mode|S_IFDIR, + 0, snapshot_src, create_flags); error = PTR_ERR_OR_ZERO(inode); if (error) goto err3; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3b0783f117ae..2789b30add10 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -106,14 +106,13 @@ int __must_check bch2_write_inode(struct bch_fs *c, inode_set_fn set, void *p, unsigned fields) { - struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter = {}; - struct bch_inode_unpacked inode_u; - int ret; + CLASS(btree_trans, trans)(c); retry: bch2_trans_begin(trans); - ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), BTREE_ITER_intent); + struct btree_iter iter = {}; + struct bch_inode_unpacked inode_u; + int ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), BTREE_ITER_intent); if (ret) goto err; @@ -156,7 +155,6 @@ err: inode_inum(inode).subvol, inode_inum(inode).inum); - bch2_trans_put(trans); return ret < 0 ? ret : 0; } @@ -166,32 +164,27 @@ int bch2_fs_quota_transfer(struct bch_fs *c, unsigned qtypes, enum quota_acct_mode mode) { - unsigned i; - int ret; - qtypes &= enabled_qtypes(c); - for (i = 0; i < QTYP_NR; i++) + for (unsigned i = 0; i < QTYP_NR; i++) if (new_qid.q[i] == inode->ei_qid.q[i]) qtypes &= ~(1U << i); if (!qtypes) return 0; - mutex_lock(&inode->ei_quota_lock); + guard(mutex)(&inode->ei_quota_lock); - ret = bch2_quota_transfer(c, qtypes, new_qid, + int ret = bch2_quota_transfer(c, qtypes, new_qid, inode->ei_qid, inode->v.i_blocks + inode->ei_quota_reserved, mode); if (!ret) - for (i = 0; i < QTYP_NR; i++) + for (unsigned i = 0; i < QTYP_NR; i++) if (qtypes & (1 << i)) inode->ei_qid.q[i] = new_qid.q[i]; - mutex_unlock(&inode->ei_quota_lock); - return ret; } @@ -241,7 +234,7 @@ int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) struct bch_fs *c = trans->c; struct rhltable *ht = &c->vfs_inodes_by_inum_table; u64 inum = p.offset; - DARRAY(u32) subvols; + CLASS(darray_u32, subvols)(); int ret = 0; if (!test_bit(BCH_FS_started, &c->flags)) @@ -280,7 +273,7 @@ restart: rcu_read_unlock(); ret = darray_make_room(&subvols, 1); if (ret) - goto err; + return ret; subvols.nr = 0; goto restart_from_top; } @@ -303,14 +296,13 @@ restart: u32 snap; ret = bch2_subvolume_get_snapshot(trans, *i, &snap); if (ret) - goto err; + return ret; ret = bch2_snapshot_is_ancestor(c, snap, p.snapshot); if (ret) break; } -err: - darray_exit(&subvols); + return ret; } @@ -367,9 +359,9 @@ repeat: static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inode) { - spin_lock(&inode->v.i_lock); - bool remove = test_and_clear_bit(EI_INODE_HASHED, &inode->ei_flags); - spin_unlock(&inode->v.i_lock); + bool remove; + scoped_guard(spinlock, &inode->v.i_lock) + remove = test_and_clear_bit(EI_INODE_HASHED, &inode->ei_flags); if (remove) { int ret = rhltable_remove(&c->vfs_inodes_by_inum_table, @@ -430,9 +422,8 @@ retry: inode_sb_list_add(&inode->v); - mutex_lock(&c->vfs_inodes_lock); - list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list); - mutex_unlock(&c->vfs_inodes_lock); + scoped_guard(mutex, &c->vfs_inodes_lock) + list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list); return inode; } } @@ -514,7 +505,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) if (inode) return &inode->v; - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bch_inode_unpacked inode_u; struct bch_subvolume subvol; @@ -522,7 +513,6 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?: PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol)); - bch2_trans_put(trans); return ret ? ERR_PTR(ret) : &inode->v; } @@ -534,7 +524,6 @@ __bch2_create(struct mnt_idmap *idmap, unsigned flags) { struct bch_fs *c = dir->v.i_sb->s_fs_info; - struct btree_trans *trans; struct bch_inode_unpacked dir_u; struct bch_inode_info *inode; struct bch_inode_unpacked inode_u; @@ -555,18 +544,23 @@ __bch2_create(struct mnt_idmap *idmap, if (ret) return ERR_PTR(ret); #endif + inode = __bch2_new_inode(c, GFP_NOFS); if (unlikely(!inode)) { - inode = ERR_PTR(-ENOMEM); - goto err; + posix_acl_release(default_acl); + posix_acl_release(acl); + return ERR_PTR(-ENOMEM); } bch2_inode_init_early(c, &inode_u); if (!(flags & BCH_CREATE_TMPFILE)) mutex_lock(&dir->ei_update_lock); - - trans = bch2_trans_get(c); + /* + * posix_acl_create() calls get_acl -> btree transaction, don't start + * ours until after, ei->update_lock must also be taken first: + */ + CLASS(btree_trans, trans)(c); retry: bch2_trans_begin(trans); @@ -625,7 +619,6 @@ err_before_quota: * restart here. */ inode = bch2_inode_hash_insert(c, NULL, inode); - bch2_trans_put(trans); err: posix_acl_release(default_acl); posix_acl_release(acl); @@ -634,7 +627,6 @@ err_trans: if (!(flags & BCH_CREATE_TMPFILE)) mutex_unlock(&dir->ei_update_lock); - bch2_trans_put(trans); make_bad_inode(&inode->v); iput(&inode->v); inode = ERR_PTR(ret); @@ -649,7 +641,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, { struct bch_fs *c = trans->c; subvol_inum inum = {}; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct qstr lookup_name; int ret = bch2_maybe_casefold(trans, dir_hash_info, name, &lookup_name); @@ -701,7 +693,6 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, goto err; out: bch2_trans_iter_exit(trans, &dirent_iter); - printbuf_exit(&buf); return inode; err: inode = ERR_PTR(ret); @@ -770,8 +761,8 @@ static int __bch2_link(struct bch_fs *c, struct bch_inode_unpacked dir_u, inode_u; int ret; - mutex_lock(&inode->ei_update_lock); - struct btree_trans *trans = bch2_trans_get(c); + guard(mutex)(&inode->ei_update_lock); + CLASS(btree_trans, trans)(c); ret = commit_do(trans, NULL, NULL, 0, bch2_link_trans(trans, @@ -785,8 +776,6 @@ static int __bch2_link(struct bch_fs *c, bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME); } - bch2_trans_put(trans); - mutex_unlock(&inode->ei_update_lock); return ret; } @@ -821,8 +810,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, int ret; bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); - - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, @@ -849,7 +837,6 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, if (IS_CASEFOLDED(vdir)) d_invalidate(dentry); err: - bch2_trans_put(trans); bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); return ret; @@ -918,7 +905,6 @@ static int bch2_rename2(struct mnt_idmap *idmap, struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode); struct bch_inode_unpacked dst_dir_u, src_dir_u; struct bch_inode_unpacked src_inode_u, dst_inode_u, *whiteout_inode_u; - struct btree_trans *trans; enum bch_rename_mode mode = flags & RENAME_EXCHANGE ? BCH_RENAME_EXCHANGE : dst_dentry->d_inode @@ -942,7 +928,7 @@ static int bch2_rename2(struct mnt_idmap *idmap, src_inode, dst_inode); - trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_inum.subvol) ?: bch2_subvol_is_ro_trans(trans, dst_dir->ei_inum.subvol); @@ -1028,8 +1014,6 @@ err_tx_restart: bch2_inode_update_after_write(trans, dst_inode, &dst_inode_u, ATTR_CTIME); err: - bch2_trans_put(trans); - bch2_fs_quota_transfer(c, src_inode, bch_qid(&src_inode->ei_inode), 1 << QTYP_PRJ, @@ -1097,7 +1081,6 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_qid qid; - struct btree_trans *trans; struct btree_iter inode_iter = {}; struct bch_inode_unpacked inode_u; struct posix_acl *acl = NULL; @@ -1105,7 +1088,7 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, kgid_t kgid; int ret; - mutex_lock(&inode->ei_update_lock); + guard(mutex)(&inode->ei_update_lock); qid = inode->ei_qid; @@ -1122,9 +1105,9 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, ret = bch2_fs_quota_transfer(c, inode, qid, ~0, KEY_TYPE_QUOTA_PREALLOC); if (ret) - goto err; + return ret; - trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); retry: bch2_trans_begin(trans); kfree(acl); @@ -1153,18 +1136,13 @@ btree_err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (unlikely(ret)) - goto err_trans; + return ret; bch2_inode_update_after_write(trans, inode, &inode_u, attr->ia_valid); if (acl) set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); -err_trans: - bch2_trans_put(trans); -err: - mutex_unlock(&inode->ei_update_lock); - - return bch2_err_class(ret); + return 0; } static int bch2_getattr(struct mnt_idmap *idmap, @@ -1228,18 +1206,16 @@ static int bch2_setattr(struct mnt_idmap *idmap, { struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); struct bch_fs *c = inode->v.i_sb->s_fs_info; - int ret; lockdep_assert_held(&inode->v.i_rwsem); - ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: - setattr_prepare(idmap, dentry, iattr); - if (ret) - return ret; + int ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: + setattr_prepare(idmap, dentry, iattr) ?: + (iattr->ia_valid & ATTR_SIZE + ? bchfs_truncate(idmap, inode, iattr) + : bch2_setattr_nonsize(idmap, inode, iattr)); - return iattr->ia_valid & ATTR_SIZE - ? bchfs_truncate(idmap, inode, iattr) - : bch2_setattr_nonsize(idmap, inode, iattr); + return bch2_err_class(ret); } static int bch2_tmpfile(struct mnt_idmap *idmap, @@ -1487,7 +1463,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, { struct bch_fs *c = vinode->i_sb->s_fs_info; struct bch_inode_info *ei = to_bch_ei(vinode); - struct btree_trans *trans; struct bch_fiemap_extent cur, prev; int ret = 0; @@ -1505,7 +1480,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bch2_bkey_buf_init(&prev.kbuf); bkey_init(&prev.kbuf.k->k); - trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); while (start < end) { ret = lockrestart_do(trans, @@ -1538,7 +1513,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, ret = bch2_fill_extent(c, info, &prev); } err: - bch2_trans_put(trans); bch2_bkey_buf_exit(&cur.kbuf, c); bch2_bkey_buf_exit(&prev.kbuf, c); @@ -1968,7 +1942,6 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child struct bch_inode_info *inode = to_bch_ei(child->d_inode); struct bch_inode_info *dir = to_bch_ei(parent->d_inode); struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct btree_trans *trans; struct btree_iter iter1; struct btree_iter iter2; struct bkey_s_c k; @@ -1983,8 +1956,7 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child if (!S_ISDIR(dir->v.i_mode)) return -EINVAL; - trans = bch2_trans_get(c); - + CLASS(btree_trans, trans)(c); bch2_trans_iter_init(trans, &iter1, BTREE_ID_dirents, POS(dir->ei_inode.bi_inum, 0), 0); bch2_trans_iter_init(trans, &iter2, BTREE_ID_dirents, @@ -2063,8 +2035,6 @@ err: bch2_trans_iter_exit(trans, &iter1); bch2_trans_iter_exit(trans, &iter2); - bch2_trans_put(trans); - return ret; } @@ -2148,12 +2118,11 @@ static int bch2_vfs_write_inode(struct inode *vinode, { struct bch_fs *c = vinode->i_sb->s_fs_info; struct bch_inode_info *inode = to_bch_ei(vinode); - int ret; - mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, - ATTR_ATIME|ATTR_MTIME|ATTR_CTIME); - mutex_unlock(&inode->ei_update_lock); + guard(mutex)(&inode->ei_update_lock); + + int ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, + ATTR_ATIME|ATTR_MTIME|ATTR_CTIME); return bch2_err_class(ret); } @@ -2200,9 +2169,8 @@ static void bch2_evict_inode(struct inode *vinode) bch2_inode_hash_remove(c, inode); } - mutex_lock(&c->vfs_inodes_lock); - list_del_init(&inode->ei_vfs_inode_list); - mutex_unlock(&c->vfs_inodes_lock); + scoped_guard(mutex, &c->vfs_inodes_lock) + list_del_init(&inode->ei_vfs_inode_list); } void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s) @@ -2352,16 +2320,14 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root) static int bch2_show_options(struct seq_file *seq, struct dentry *root) { struct bch_fs *c = root->d_sb->s_fs_info; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_opts_to_text(&buf, c->opts, c, c->disk_sb.sb, OPT_MOUNT, OPT_HIDDEN, OPT_SHOW_MOUNT_STYLE); printbuf_nul_terminate(&buf); seq_printf(seq, ",%s", buf.buf); - int ret = buf.allocation_failure ? -ENOMEM : 0; - printbuf_exit(&buf); - return ret; + return buf.allocation_failure ? -ENOMEM : 0; } static void bch2_put_super(struct super_block *sb) @@ -2383,24 +2349,20 @@ static int bch2_freeze(struct super_block *sb) { struct bch_fs *c = sb->s_fs_info; - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); bch2_fs_read_only(c); - up_write(&c->state_lock); return 0; } static int bch2_unfreeze(struct super_block *sb) { struct bch_fs *c = sb->s_fs_info; - int ret; if (test_bit(BCH_FS_emergency_ro, &c->flags)) return 0; - down_write(&c->state_lock); - ret = bch2_fs_read_write(c); - up_write(&c->state_lock); - return ret; + guard(rwsem_write)(&c->state_lock); + return bch2_fs_read_write(c); } static const struct super_operations bch_super_operations = { @@ -2671,7 +2633,7 @@ static int bch2_fs_reconfigure(struct fs_context *fc) opt_set(opts->opts, read_only, (fc->sb_flags & SB_RDONLY) != 0); if (opts->opts.read_only != c->opts.read_only) { - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); if (opts->opts.read_only) { bch2_fs_read_only(c); @@ -2681,22 +2643,18 @@ static int bch2_fs_reconfigure(struct fs_context *fc) ret = bch2_fs_read_write(c); if (ret) { bch_err(c, "error going rw: %i", ret); - up_write(&c->state_lock); - ret = -EINVAL; - goto err; + return -EINVAL; } sb->s_flags &= ~SB_RDONLY; } c->opts.read_only = opts->opts.read_only; - - up_write(&c->state_lock); } if (opt_defined(opts->opts, errors)) c->opts.errors = opts->opts.errors; -err: + return bch2_err_class(ret); } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 471e93a3f00c..df0aa2522b18 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -53,10 +53,9 @@ static int dirent_points_to_inode(struct bch_fs *c, { int ret = dirent_points_to_inode_nowarn(c, dirent, inode); if (ret) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); dirent_inode_mismatch_msg(&buf, c, dirent, inode); bch_warn(c, "%s", buf.buf); - printbuf_exit(&buf); } return ret; } @@ -253,14 +252,13 @@ create_lostfound: * XXX: we could have a nicer log message here if we had a nice way to * walk backpointers to print a path */ - struct printbuf path = PRINTBUF; + CLASS(printbuf, path)(); ret = bch2_inum_to_path(trans, root_inum, &path); if (ret) goto err; bch_notice(c, "creating %s/lost+found in subvol %llu snapshot %u", path.buf, root_inum.subvol, snapshot); - printbuf_exit(&path); u64 now = bch2_current_time(c); u64 cpu = raw_smp_processor_id(); @@ -455,7 +453,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * * whiteouts for the dirent we just created. */ if (!inode->bi_subvol && bch2_snapshot_is_leaf(c, inode->bi_snapshot) <= 0) { - snapshot_id_list whiteouts_done; + CLASS(snapshot_id_list, whiteouts_done)(); struct btree_iter iter; struct bkey_s_c k; @@ -499,7 +497,6 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * break; } } - darray_exit(&whiteouts_done); bch2_trans_iter_exit(trans, &iter); } @@ -683,11 +680,15 @@ static inline void snapshots_seen_exit(struct snapshots_seen *s) darray_exit(&s->ids); } -static inline void snapshots_seen_init(struct snapshots_seen *s) +static inline struct snapshots_seen snapshots_seen_init(void) { - memset(s, 0, sizeof(*s)); + return (struct snapshots_seen) {}; } +DEFINE_CLASS(snapshots_seen, struct snapshots_seen, + snapshots_seen_exit(&_T), + snapshots_seen_init(), void) + static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id) { u32 *i; @@ -815,9 +816,13 @@ static void inode_walker_exit(struct inode_walker *w) static struct inode_walker inode_walker_init(void) { - return (struct inode_walker) { 0, }; + return (struct inode_walker) {}; } +DEFINE_CLASS(inode_walker, struct inode_walker, + inode_walker_exit(&_T), + inode_walker_init(), void) + static int add_inode(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c inode) { @@ -917,7 +922,7 @@ lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, str if (!i) return NULL; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; if (fsck_err_on(k.k->p.snapshot != i->inode.bi_snapshot, @@ -967,10 +972,8 @@ lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, str goto fsck_err; } - printbuf_exit(&buf); return i; fsck_err: - printbuf_exit(&buf); return ERR_PTR(ret); } @@ -1004,27 +1007,25 @@ int bch2_fsck_update_backpointers(struct btree_trans *trans, return 0; struct bkey_i_dirent *d = bkey_i_to_dirent(new); - struct inode_walker target = inode_walker_init(); - int ret = 0; + CLASS(inode_walker, target)(); if (d->v.d_type == DT_SUBVOL) { bch_err(trans->c, "%s does not support DT_SUBVOL", __func__); - ret = -BCH_ERR_fsck_repair_unimplemented; + return bch_err_throw(trans->c, fsck_repair_unimplemented); } else { - ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum)); + int ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum)); if (ret) - goto err; + return ret; darray_for_each(target.inodes, i) { i->inode.bi_dir_offset = d->k.p.offset; ret = __bch2_fsck_write_inode(trans, &i->inode); if (ret) - goto err; + return ret; } + + return 0; } -err: - inode_walker_exit(&target); - return ret; } static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, @@ -1056,7 +1057,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans, bool *write_inode) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); u32 inode_snapshot = inode->bi_snapshot; struct btree_iter dirent_iter = {}; @@ -1106,7 +1107,6 @@ out: ret = 0; fsck_err: bch2_trans_iter_exit(trans, &dirent_iter); - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } @@ -1118,7 +1118,7 @@ static int check_inode(struct btree_trans *trans, struct snapshots_seen *s) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct bch_inode_unpacked u; bool do_update = false; int ret; @@ -1234,7 +1234,7 @@ static int check_inode(struct btree_trans *trans, */ ret = check_inode_deleted_list(trans, k.k->p); if (ret < 0) - goto err_noprint; + return ret; fsck_err_on(!ret, trans, unlinked_inode_not_on_deleted_list, @@ -1255,7 +1255,7 @@ static int check_inode(struct btree_trans *trans, u.bi_inum, u.bi_snapshot)) { ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); bch_err_msg(c, ret, "in fsck deleting inode"); - goto err_noprint; + return ret; } ret = 0; } @@ -1316,33 +1316,26 @@ do_update: ret = __bch2_fsck_write_inode(trans, &u); bch_err_msg(c, ret, "in fsck updating inode"); if (ret) - goto err_noprint; + return ret; } err: fsck_err: bch_err_fn(c, ret); -err_noprint: - printbuf_exit(&buf); return ret; } int bch2_check_inodes(struct bch_fs *c) { struct bch_inode_unpacked snapshot_root = {}; - struct snapshots_seen s; - snapshots_seen_init(&s); + CLASS(btree_trans, trans)(c); + CLASS(snapshots_seen, s)(); - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, + return for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_inode(trans, &iter, k, &snapshot_root, &s))); - - snapshots_seen_exit(&s); - bch_err_fn(c, ret); - return ret; + check_inode(trans, &iter, k, &snapshot_root, &s)); } static int find_oldest_inode_needs_reattach(struct btree_trans *trans, @@ -1390,7 +1383,7 @@ static int check_unreachable_inode(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; if (!bkey_is_inode(k.k)) @@ -1414,7 +1407,6 @@ static int check_unreachable_inode(struct btree_trans *trans, buf.buf))) ret = reattach_inode(trans, &inode); fsck_err: - printbuf_exit(&buf); return ret; } @@ -1430,14 +1422,12 @@ fsck_err: */ int bch2_check_unreachable_inodes(struct bch_fs *c) { - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_unreachable_inode(trans, &iter, k))); - bch_err_fn(c, ret); - return ret; + check_unreachable_inode(trans, &iter, k)); } static inline bool btree_matches_i_mode(enum btree_id btree, unsigned mode) @@ -1461,7 +1451,7 @@ static int check_key_has_inode(struct btree_trans *trans, struct bkey_s_c k) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct btree_iter iter2 = {}; int ret = PTR_ERR_OR_ZERO(i); if (ret) @@ -1557,7 +1547,6 @@ out: err: fsck_err: bch2_trans_iter_exit(trans, &iter2); - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; delete: @@ -1627,23 +1616,28 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal if (i->inode.bi_sectors == i->count) continue; + CLASS(printbuf, buf)(); + lockrestart_do(trans, + bch2_inum_snapshot_to_path(trans, + i->inode.bi_inum, + i->inode.bi_snapshot, NULL, &buf)); + count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->inode.bi_snapshot); if (w->recalculate_sums) i->count = count2; if (i->count != count2) { - bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", - w->last_pos.inode, i->inode.bi_snapshot, i->count, count2); + bch_err_ratelimited(c, "fsck counted i_sectors wrong: got %llu should be %llu\n%s", + i->count, count2, buf.buf); i->count = count2; } if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty) && i->inode.bi_sectors != i->count, trans, inode_i_sectors_wrong, - "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", - w->last_pos.inode, i->inode.bi_snapshot, - i->inode.bi_sectors, i->count)) { + "incorrect i_sectors: got %llu, should be %llu\n%s", + i->inode.bi_sectors, i->count, buf.buf)) { i->inode.bi_sectors = i->count; ret = bch2_fsck_write_inode(trans, &i->inode); if (ret) @@ -1686,11 +1680,15 @@ static void extent_ends_exit(struct extent_ends *extent_ends) darray_exit(&extent_ends->e); } -static void extent_ends_init(struct extent_ends *extent_ends) +static struct extent_ends extent_ends_init(void) { - memset(extent_ends, 0, sizeof(*extent_ends)); + return (struct extent_ends) {}; } +DEFINE_CLASS(extent_ends, struct extent_ends, + extent_ends_exit(&_T), + extent_ends_init(), void) + static int extent_ends_at(struct bch_fs *c, struct extent_ends *extent_ends, struct snapshots_seen *seen, @@ -1730,7 +1728,7 @@ static int overlapping_extents_found(struct btree_trans *trans, struct extent_end *extent_end) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct btree_iter iter1, iter2 = {}; struct bkey_s_c k1, k2; int ret; @@ -1836,7 +1834,6 @@ fsck_err: err: bch2_trans_iter_exit(trans, &iter2); bch2_trans_iter_exit(trans, &iter1); - printbuf_exit(&buf); return ret; } @@ -1893,11 +1890,10 @@ static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *it bkey_for_each_crc(k.k, ptrs, crc, i) if (crc_is_encoded(crc) && crc.uncompressed_size > encoded_extent_max_sectors) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, k); bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf); - printbuf_exit(&buf); } return 0; @@ -1911,7 +1907,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, struct disk_reservation *res) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; ret = bch2_check_key_has_snapshot(trans, iter, k); @@ -2004,7 +2000,6 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, out: err: fsck_err: - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } @@ -2015,49 +2010,41 @@ fsck_err: */ int bch2_check_extents(struct bch_fs *c) { - struct inode_walker w = inode_walker_init(); - struct snapshots_seen s; - struct extent_ends extent_ends; struct disk_reservation res = { 0 }; - snapshots_seen_init(&s); - extent_ends_init(&extent_ends); + CLASS(btree_trans, trans)(c); + CLASS(snapshots_seen, s)(); + CLASS(inode_walker, w)(); + CLASS(extent_ends, extent_ends)(); - int ret = bch2_trans_run(c, - for_each_btree_key(trans, iter, BTREE_ID_extents, + int ret = for_each_btree_key(trans, iter, BTREE_ID_extents, POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ bch2_disk_reservation_put(c, &res); check_extent(trans, &iter, k, &w, &s, &extent_ends, &res) ?: check_extent_overbig(trans, &iter, k); })) ?: - check_i_sectors_notnested(trans, &w)); + check_i_sectors_notnested(trans, &w); bch2_disk_reservation_put(c, &res); - extent_ends_exit(&extent_ends); - inode_walker_exit(&w); - snapshots_seen_exit(&s); - - bch_err_fn(c, ret); return ret; } int bch2_check_indirect_extents(struct bch_fs *c) { + CLASS(btree_trans, trans)(c); struct disk_reservation res = { 0 }; - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, POS_MIN, BTREE_ITER_prefetch, k, &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({ bch2_disk_reservation_put(c, &res); check_extent_overbig(trans, &iter, k); - }))); + })); bch2_disk_reservation_put(c, &res); - bch_err_fn(c, ret); return ret; } @@ -2150,7 +2137,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * u32 parent_snapshot; u32 new_parent_subvol = 0; u64 parent_inum; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum); @@ -2274,7 +2261,6 @@ out: err: fsck_err: bch2_trans_iter_exit(trans, &subvol_iter); - printbuf_exit(&buf); return ret; } @@ -2288,39 +2274,37 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, { struct bch_fs *c = trans->c; struct inode_walker_entry *i; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; ret = bch2_check_key_has_snapshot(trans, iter, k); - if (ret) { - ret = ret < 0 ? ret : 0; - goto out; - } + if (ret) + return ret < 0 ? ret : 0; ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); if (ret) - goto err; + return ret; if (k.k->type == KEY_TYPE_whiteout) - goto out; + return 0; if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) { ret = check_subdir_dirents_count(trans, dir); if (ret) - goto err; + return ret; } i = walk_inode(trans, dir, k); ret = PTR_ERR_OR_ZERO(i); - if (ret < 0) - goto err; + if (ret) + return ret; ret = check_key_has_inode(trans, iter, dir, i, k); if (ret) - goto err; + return ret; if (!i || i->whiteout) - goto out; + return 0; if (dir->first_this_inode) *hash_info = bch2_hash_info_init(c, &i->inode); @@ -2331,15 +2315,11 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info, iter, k, need_second_pass); if (ret < 0) - goto err; - if (ret) { - /* dirent has been deleted */ - ret = 0; - goto out; - } - + return ret; + if (ret) + return 0; /* dirent has been deleted */ if (k.k->type != KEY_TYPE_dirent) - goto out; + return 0; struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); @@ -2364,13 +2344,13 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, d.v->d_type, &name, NULL, target); ret = PTR_ERR_OR_ZERO(new_d); if (ret) - goto out; + return ret; new_d->k.p.inode = d.k->p.inode; new_d->k.p.snapshot = d.k->p.snapshot; struct btree_iter dup_iter = {}; - ret = bch2_hash_delete_at(trans, + return bch2_hash_delete_at(trans, bch2_dirent_hash_desc, hash_info, iter, BTREE_UPDATE_internal_snapshot_node) ?: bch2_str_hash_repair_key(trans, s, @@ -2378,17 +2358,16 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, iter, bkey_i_to_s_c(&new_d->k_i), &dup_iter, bkey_s_c_null, need_second_pass); - goto out; } if (d.v->d_type == DT_SUBVOL) { ret = check_dirent_to_subvol(trans, iter, d); if (ret) - goto err; + return ret; } else { ret = get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum)); if (ret) - goto err; + return ret; if (!target->inodes.nr) { ret = maybe_reconstruct_inum(trans, le64_to_cpu(d.v->d_inum), @@ -2405,13 +2384,13 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, buf.buf))) { ret = bch2_fsck_remove_dirent(trans, d.k->p); if (ret) - goto err; + return ret; } darray_for_each(target->inodes, i) { ret = bch2_check_dirent_target(trans, iter, d, &i->inode, true); if (ret) - goto err; + return ret; } darray_for_each(target->deletes, i) @@ -2434,24 +2413,27 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, BTREE_UPDATE_internal_snapshot_node); bch2_trans_iter_exit(trans, &delete_iter); if (ret) - goto err; + return ret; } } + /* + * Cannot access key values after doing a transaction commit without + * revalidating: + */ + bool have_dir = d.v->d_type == DT_DIR; + ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); if (ret) - goto err; + return ret; for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) { - if (d.v->d_type == DT_DIR) + if (have_dir) i->count++; i->i_size += bkey_bytes(d.k); } -out: -err: fsck_err: - printbuf_exit(&buf); return ret; } @@ -2461,23 +2443,21 @@ fsck_err: */ int bch2_check_dirents(struct bch_fs *c) { - struct inode_walker dir = inode_walker_init(); - struct inode_walker target = inode_walker_init(); - struct snapshots_seen s; struct bch_hash_info hash_info; + CLASS(btree_trans, trans)(c); + CLASS(snapshots_seen, s)(); + CLASS(inode_walker, dir)(); + CLASS(inode_walker, target)(); bool need_second_pass = false, did_second_pass = false; int ret; - - snapshots_seen_init(&s); again: - ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s, &need_second_pass)) ?: - check_subdir_count_notnested(trans, &dir)); + check_subdir_count_notnested(trans, &dir); if (!ret && need_second_pass && !did_second_pass) { bch_info(c, "check_dirents requires second pass"); @@ -2490,10 +2470,6 @@ again: ret = -EINVAL; } - snapshots_seen_exit(&s); - inode_walker_exit(&dir); - inode_walker_exit(&target); - bch_err_fn(c, ret); return ret; } @@ -2536,21 +2512,17 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, */ int bch2_check_xattrs(struct bch_fs *c) { - struct inode_walker inode = inode_walker_init(); struct bch_hash_info hash_info; - int ret = 0; + CLASS(btree_trans, trans)(c); + CLASS(inode_walker, inode)(); - ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_xattr(trans, &iter, k, &hash_info, &inode))); - - inode_walker_exit(&inode); - bch_err_fn(c, ret); + check_xattr(trans, &iter, k, &hash_info, &inode)); return ret; } @@ -2615,18 +2587,17 @@ fsck_err: /* Get root directory, create if it doesn't exist: */ int bch2_check_root(struct bch_fs *c) { - int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_root_trans(trans)); - bch_err_fn(c, ret); - return ret; + CLASS(btree_trans, trans)(c); + return commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + check_root_trans(trans)); } static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { struct bch_fs *c = trans->c; struct btree_iter parent_iter = {}; - darray_u32 subvol_path = {}; - struct printbuf buf = PRINTBUF; + CLASS(darray_u32, subvol_path)(); + CLASS(printbuf, buf)(); int ret = 0; if (k.k->type != KEY_TYPE_subvolume) @@ -2686,21 +2657,17 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, } fsck_err: err: - printbuf_exit(&buf); - darray_exit(&subvol_path); bch2_trans_iter_exit(trans, &parent_iter); return ret; } int bch2_check_subvolume_structure(struct bch_fs *c) { - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_subvol_path(trans, &iter, k))); - bch_err_fn(c, ret); - return ret; + check_subvol_path(trans, &iter, k)); } static int bch2_bi_depth_renumber_one(struct btree_trans *trans, @@ -2751,8 +2718,8 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) { struct bch_fs *c = trans->c; struct btree_iter inode_iter = {}; - darray_u64 path = {}; - struct printbuf buf = PRINTBUF; + CLASS(darray_u64, path)(); + CLASS(printbuf, buf)(); u32 snapshot = inode_k.k->p.snapshot; bool redo_bi_depth = false; u32 min_bi_depth = U32_MAX; @@ -2858,8 +2825,6 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) out: fsck_err: bch2_trans_iter_exit(trans, &inode_iter); - darray_exit(&path); - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } @@ -2870,8 +2835,8 @@ fsck_err: */ int bch2_check_directory_structure(struct bch_fs *c) { - int ret = bch2_trans_run(c, - for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_inodes, POS_MIN, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_intent| BTREE_ITER_prefetch| BTREE_ITER_all_snapshots, k, @@ -2883,10 +2848,7 @@ int bch2_check_directory_structure(struct bch_fs *c) continue; check_path_loop(trans, k); - }))); - - bch_err_fn(c, ret); - return ret; + })); } struct nlink_table { @@ -2970,8 +2932,8 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, struct nlink_table *t, u64 start, u64 *end) { - int ret = bch2_trans_run(c, - for_each_btree_key(trans, iter, BTREE_ID_inodes, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, start), BTREE_ITER_intent| BTREE_ITER_prefetch| @@ -3006,7 +2968,7 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, break; } 0; - }))); + })); bch_err_fn(c, ret); return ret; @@ -3016,12 +2978,10 @@ noinline_for_stack static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, u64 range_start, u64 range_end) { - struct snapshots_seen s; - - snapshots_seen_init(&s); + CLASS(btree_trans, trans)(c); + CLASS(snapshots_seen, s)(); - int ret = bch2_trans_run(c, - for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, + int ret = for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, BTREE_ITER_intent| BTREE_ITER_prefetch| BTREE_ITER_all_snapshots, k, ({ @@ -3038,9 +2998,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links le64_to_cpu(d.v->d_inum), d.k->p.snapshot); } 0; - }))); - - snapshots_seen_exit(&s); + })); bch_err_fn(c, ret); return ret; @@ -3094,14 +3052,14 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, struct nlink_table *links, u64 range_start, u64 range_end) { + CLASS(btree_trans, trans)(c); size_t idx = 0; - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS(0, range_start), BTREE_ITER_intent|BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); + check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end)); if (ret < 0) { bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); return ret; @@ -3140,7 +3098,6 @@ int bch2_check_nlinks(struct bch_fs *c) } while (next_iter_range_start != U64_MAX); kvfree(links.d); - bch_err_fn(c, ret); return ret; } @@ -3175,15 +3132,13 @@ int bch2_fix_reflink_p(struct bch_fs *c) if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) return 0; - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_extents, POS_MIN, BTREE_ITER_intent|BTREE_ITER_prefetch| BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - fix_reflink_p_key(trans, &iter, k))); - bch_err_fn(c, ret); - return ret; + fix_reflink_p_key(trans, &iter, k)); } #ifndef NO_BCACHEFS_CHARDEV @@ -3209,6 +3164,8 @@ static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio) if (ret) return ret; + thr->c->recovery_task = current; + ret = bch2_fs_start(thr->c); if (ret) goto err; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 307fb0c95656..4a9725f30c4f 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -417,7 +417,8 @@ int bch2_inode_find_by_inum_trans(struct btree_trans *trans, int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum, struct bch_inode_unpacked *inode) { - return bch2_trans_do(c, bch2_inode_find_by_inum_trans(trans, inum, inode)); + CLASS(btree_trans, trans)(c); + return lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, inode)); } int bch2_inode_find_snapshot_root(struct btree_trans *trans, u64 inum, @@ -1132,7 +1133,7 @@ err: int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter = {}; struct bkey_s_c k; struct bch_inode_unpacked inode; @@ -1141,7 +1142,7 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) ret = lockrestart_do(trans, may_delete_deleted_inum(trans, inum, &inode)); if (ret) - goto err2; + return ret; /* * If this was a directory, there shouldn't be any real dirents left - @@ -1156,7 +1157,7 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) : bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents)) ?: bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs); if (ret) - goto err2; + return ret; retry: bch2_trans_begin(trans); @@ -1188,12 +1189,9 @@ err: goto retry; if (ret) - goto err2; + return ret; - ret = delete_ancestor_snapshot_inodes(trans, SPOS(0, inum.inum, snapshot)); -err2: - bch2_trans_put(trans); - return ret; + return delete_ancestor_snapshot_inodes(trans, SPOS(0, inum.inum, snapshot)); } int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi) @@ -1413,7 +1411,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos, struct bch_fs *c = trans->c; struct btree_iter inode_iter; struct bkey_s_c k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret; k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, pos, BTREE_ITER_cached); @@ -1506,7 +1504,6 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos, out: fsck_err: bch2_trans_iter_exit(trans, &inode_iter); - printbuf_exit(&buf); return ret; delete: ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, pos, false); @@ -1524,29 +1521,23 @@ static int may_delete_deleted_inum(struct btree_trans *trans, subvol_inum inum, int bch2_delete_dead_inodes(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); - int ret; - + CLASS(btree_trans, trans)(c); /* * if we ran check_inodes() unlinked inodes will have already been * cleaned up but the write buffer will be out of sync; therefore we * alway need a write buffer flush - */ - ret = bch2_btree_write_buffer_flush_sync(trans); - if (ret) - goto err; - - /* + * * Weird transaction restart handling here because on successful delete, * bch2_inode_rm_snapshot() will return a nested transaction restart, * but we can't retry because the btree write buffer won't have been * flushed and we'd spin: */ - ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, + return bch2_btree_write_buffer_flush_sync(trans) ?: + for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ struct bch_inode_unpacked inode; - ret = may_delete_deleted_inode(trans, k.k->p, &inode, true); + int ret = may_delete_deleted_inode(trans, k.k->p, &inode, true); if (ret > 0) { bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); @@ -1567,8 +1558,4 @@ int bch2_delete_dead_inodes(struct bch_fs *c) ret; })); -err: - bch2_trans_put(trans); - bch_err_fn(c, ret); - return ret; } diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 07023667a475..5d6681c070ba 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -114,12 +114,11 @@ err: if (!ret && sectors_allocated) bch2_increment_clock(c, sectors_allocated, WRITE); if (should_print_err(ret)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter->pos.offset << 9)); prt_printf(&buf, "fallocate error: %s", bch2_err_str(ret)); bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); } err_noprint: bch2_open_buckets_put(c, &open_buckets); @@ -222,23 +221,18 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, s64 *i_sectors_delta) { - struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; - int ret; + CLASS(btree_trans, trans)(c); + struct btree_iter iter; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(inum.inum, start), BTREE_ITER_intent); - ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta); + int ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta); bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - ret = 0; - return ret; + return bch2_err_matches(ret, BCH_ERR_transaction_restart) ? 0 : ret; } /* truncate: */ @@ -319,17 +313,13 @@ int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sec * snapshot while they're in progress, then crashing, will result in the * resume only proceeding in one of the snapshots */ - down_read(&c->snapshot_create_lock); - struct btree_trans *trans = bch2_trans_get(c); + guard(rwsem_read)(&c->snapshot_create_lock); + CLASS(btree_trans, trans)(c); int ret = bch2_logged_op_start(trans, &op.k_i); if (ret) - goto out; + return ret; ret = __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta); ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; -out: - bch2_trans_put(trans); - up_read(&c->snapshot_create_lock); - return ret; } @@ -555,16 +545,12 @@ int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum, * snapshot while they're in progress, then crashing, will result in the * resume only proceeding in one of the snapshots */ - down_read(&c->snapshot_create_lock); - struct btree_trans *trans = bch2_trans_get(c); + guard(rwsem_read)(&c->snapshot_create_lock); + CLASS(btree_trans, trans)(c); int ret = bch2_logged_op_start(trans, &op.k_i); if (ret) - goto out; + return ret; ret = __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta); ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; -out: - bch2_trans_put(trans); - up_read(&c->snapshot_create_lock); - return ret; } diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 210b6adc359f..b8ccd8c930e1 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -39,38 +39,73 @@ MODULE_PARM_DESC(read_corrupt_ratio, ""); #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT +static inline u32 bch2_dev_congested_read(struct bch_dev *ca, u64 now) +{ + s64 congested = atomic_read(&ca->congested); + u64 last = READ_ONCE(ca->congested_last); + if (time_after64(now, last)) + congested -= (now - last) >> 12; + + return clamp(congested, 0LL, CONGESTED_MAX); +} + static bool bch2_target_congested(struct bch_fs *c, u16 target) { const struct bch_devs_mask *devs; unsigned d, nr = 0, total = 0; - u64 now = local_clock(), last; - s64 congested; - struct bch_dev *ca; - - if (!target) - return false; + u64 now = local_clock(); guard(rcu)(); devs = bch2_target_to_mask(c, target) ?: &c->rw_devs[BCH_DATA_user]; for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) { - ca = rcu_dereference(c->devs[d]); + struct bch_dev *ca = rcu_dereference(c->devs[d]); if (!ca) continue; - congested = atomic_read(&ca->congested); - last = READ_ONCE(ca->congested_last); - if (time_after64(now, last)) - congested -= (now - last) >> 12; - - total += max(congested, 0LL); + total += bch2_dev_congested_read(ca, now); nr++; } return get_random_u32_below(nr * CONGESTED_MAX) < total; } +void bch2_dev_congested_to_text(struct printbuf *out, struct bch_dev *ca) +{ + printbuf_tabstop_push(out, 32); + + prt_printf(out, "current:\t%u%%\n", + bch2_dev_congested_read(ca, local_clock()) * + 100 / CONGESTED_MAX); + + prt_printf(out, "raw:\t%i/%u\n", atomic_read(&ca->congested), CONGESTED_MAX); + + prt_printf(out, "last io over threshold:\t"); + bch2_pr_time_units(out, local_clock() - ca->congested_last); + prt_newline(out); + + prt_printf(out, "read latency threshold:\t"); + bch2_pr_time_units(out, + ca->io_latency[READ].quantiles.entries[QUANTILE_IDX(1)].m << 2); + prt_newline(out); + + prt_printf(out, "median read latency:\t"); + bch2_pr_time_units(out, + ca->io_latency[READ].quantiles.entries[QUANTILE_IDX(7)].m); + prt_newline(out); + + prt_printf(out, "write latency threshold:\t"); + bch2_pr_time_units(out, + ca->io_latency[WRITE].quantiles.entries[QUANTILE_IDX(1)].m << 3); + prt_newline(out); + + prt_printf(out, "median write latency:\t"); + bch2_pr_time_units(out, + ca->io_latency[WRITE].quantiles.entries[QUANTILE_IDX(7)].m); + prt_newline(out); +} + #else static bool bch2_target_congested(struct bch_fs *c, u16 target) @@ -130,22 +165,32 @@ static inline int should_promote(struct bch_fs *c, struct bkey_s_c k, if (!have_io_error(failed)) { BUG_ON(!opts.promote_target); - if (!(flags & BCH_READ_may_promote)) + if (!(flags & BCH_READ_may_promote)) { + count_event(c, io_read_nopromote_may_not); return bch_err_throw(c, nopromote_may_not); + } - if (bch2_bkey_has_target(c, k, opts.promote_target)) + if (bch2_bkey_has_target(c, k, opts.promote_target)) { + count_event(c, io_read_nopromote_already_promoted); return bch_err_throw(c, nopromote_already_promoted); + } - if (bkey_extent_is_unwritten(k)) + if (bkey_extent_is_unwritten(k)) { + count_event(c, io_read_nopromote_unwritten); return bch_err_throw(c, nopromote_unwritten); + } - if (bch2_target_congested(c, opts.promote_target)) + if (bch2_target_congested(c, opts.promote_target)) { + count_event(c, io_read_nopromote_congested); return bch_err_throw(c, nopromote_congested); + } } if (rhashtable_lookup_fast(&c->promote_table, &pos, - bch_promote_params)) + bch_promote_params)) { + count_event(c, io_read_nopromote_in_flight); return bch_err_throw(c, nopromote_in_flight); + } return 0; } @@ -160,6 +205,7 @@ static noinline void promote_free(struct bch_read_bio *rbio) BUG_ON(ret); async_object_list_del(c, promote, op->list_idx); + async_object_list_del(c, rbio, rbio->list_idx); bch2_data_update_exit(&op->write); @@ -343,16 +389,27 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans, return promote; nopromote: - trace_io_read_nopromote(c, ret); + if (trace_io_read_nopromote_enabled()) { + CLASS(printbuf, buf)(); + printbuf_indent_add_nextline(&buf, 2); + prt_printf(&buf, "%s\n", bch2_err_str(ret)); + bch2_bkey_val_to_text(&buf, c, k); + + trace_io_read_nopromote(c, buf.buf); + } + count_event(c, io_read_nopromote); + return NULL; } -void bch2_promote_op_to_text(struct printbuf *out, struct promote_op *op) +void bch2_promote_op_to_text(struct printbuf *out, + struct bch_fs *c, + struct promote_op *op) { if (!op->write.read_done) { prt_printf(out, "parent read: %px\n", op->write.rbio.parent); printbuf_indent_add(out, 2); - bch2_read_bio_to_text(out, op->write.rbio.parent); + bch2_read_bio_to_text(out, c, op->write.rbio.parent); printbuf_indent_sub(out, 2); } @@ -380,7 +437,8 @@ static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *o static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out, struct bch_read_bio *rbio, struct bpos read_pos) { - bch2_trans_run(c, bch2_read_err_msg_trans(trans, out, rbio, read_pos)); + CLASS(btree_trans, trans)(c); + bch2_read_err_msg_trans(trans, out, rbio, read_pos); } enum rbio_context { @@ -450,6 +508,10 @@ static void bch2_rbio_done(struct bch_read_bio *rbio) if (rbio->start_time) bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read], rbio->start_time); +#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS + if (rbio->list_idx) + async_object_list_del(rbio->c, rbio, rbio->list_idx); +#endif bio_endio(&rbio->bio); } @@ -577,7 +639,7 @@ static void bch2_rbio_retry(struct work_struct *work) }; struct bch_io_failures failed = { .nr = 0 }; - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bkey_buf sk; bch2_bkey_buf_init(&sk); @@ -619,7 +681,7 @@ static void bch2_rbio_retry(struct work_struct *work) } if (failed.nr || ret) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); lockrestart_do(trans, @@ -647,12 +709,10 @@ static void bch2_rbio_retry(struct work_struct *work) bch2_io_failures_to_text(&buf, c, &failed); bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } bch2_rbio_done(rbio); bch2_bkey_buf_exit(&sk, c); - bch2_trans_put(trans); } static void bch2_rbio_error(struct bch_read_bio *rbio, @@ -740,8 +800,9 @@ out: static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) { - bch2_trans_commit_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - __bch2_rbio_narrow_crcs(trans, rbio)); + CLASS(btree_trans, trans)(rbio->c); + commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + __bch2_rbio_narrow_crcs(trans, rbio)); } static void bch2_read_decompress_err(struct work_struct *work) @@ -749,7 +810,7 @@ static void bch2_read_decompress_err(struct work_struct *work) struct bch_read_bio *rbio = container_of(work, struct bch_read_bio, work); struct bch_fs *c = rbio->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); prt_str(&buf, "decompression error"); @@ -761,7 +822,6 @@ static void bch2_read_decompress_err(struct work_struct *work) bch_err_ratelimited(c, "%s", buf.buf); bch2_rbio_error(rbio, -BCH_ERR_data_read_decompress_err, BLK_STS_IOERR); - printbuf_exit(&buf); } static void bch2_read_decrypt_err(struct work_struct *work) @@ -769,7 +829,7 @@ static void bch2_read_decrypt_err(struct work_struct *work) struct bch_read_bio *rbio = container_of(work, struct bch_read_bio, work); struct bch_fs *c = rbio->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); prt_str(&buf, "decrypt error"); @@ -781,7 +841,6 @@ static void bch2_read_decrypt_err(struct work_struct *work) bch_err_ratelimited(c, "%s", buf.buf); bch2_rbio_error(rbio, -BCH_ERR_data_read_decrypt_err, BLK_STS_IOERR); - printbuf_exit(&buf); } /* Inner part that may run in process context */ @@ -963,7 +1022,7 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_iter iter; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, @@ -1000,7 +1059,6 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, bch2_fs_inconsistent(c, "%s", buf.buf); bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); } int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, @@ -1052,25 +1110,22 @@ retry_pick: trace_and_count(c, io_read_fail_and_poison, &orig->bio); } - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_read_err_msg_trans(trans, &buf, orig, read_pos); prt_printf(&buf, "%s\n ", bch2_err_str(ret)); bch2_bkey_val_to_text(&buf, c, k); - bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); goto err; } if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && !c->chacha20_key_set) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_read_err_msg_trans(trans, &buf, orig, read_pos); prt_printf(&buf, "attempting to read encrypted data without encryption key\n "); bch2_bkey_val_to_text(&buf, c, k); bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); ret = bch_err_throw(c, data_read_no_encryption_key); goto err; } @@ -1439,13 +1494,12 @@ err: if (unlikely(ret)) { if (ret != -BCH_ERR_extent_poisoned) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, &buf, inum, bvec_iter.bi_sector << 9)); prt_printf(&buf, "data read error: %s", bch2_err_str(ret)); bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); } rbio->bio.bi_status = BLK_STS_IOERR; @@ -1467,19 +1521,34 @@ static const char * const bch2_read_bio_flags[] = { NULL }; -void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio) +void bch2_read_bio_to_text(struct printbuf *out, + struct bch_fs *c, + struct bch_read_bio *rbio) { + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 20); + + bch2_read_err_msg(c, out, rbio, rbio->read_pos); + prt_newline(out); + + /* Are we in a retry? */ + + printbuf_indent_add(out, 2); + u64 now = local_clock(); - prt_printf(out, "start_time:\t%llu\n", rbio->start_time ? now - rbio->start_time : 0); - prt_printf(out, "submit_time:\t%llu\n", rbio->submit_time ? now - rbio->submit_time : 0); + prt_printf(out, "start_time:\t"); + bch2_pr_time_units(out, max_t(s64, 0, now - rbio->start_time)); + prt_newline(out); + + prt_printf(out, "submit_time:\t"); + bch2_pr_time_units(out, max_t(s64, 0, now - rbio->submit_time)); + prt_newline(out); if (!rbio->split) prt_printf(out, "end_io:\t%ps\n", rbio->end_io); else prt_printf(out, "parent:\t%px\n", rbio->parent); - prt_printf(out, "bi_end_io:\t%ps\n", rbio->bio.bi_end_io); - prt_printf(out, "promote:\t%u\n", rbio->promote); prt_printf(out, "bounce:\t%u\n", rbio->bounce); prt_printf(out, "split:\t%u\n", rbio->split); @@ -1498,6 +1567,7 @@ void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio) prt_newline(out); bch2_bio_to_text(out, &rbio->bio); + printbuf_indent_sub(out, 2); } void bch2_fs_io_read_exit(struct bch_fs *c) diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h index cfc8ef35b14d..9d63d5914b20 100644 --- a/fs/bcachefs/io_read.h +++ b/fs/bcachefs/io_read.h @@ -7,6 +7,10 @@ #include "extents_types.h" #include "reflink.h" +#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT +void bch2_dev_congested_to_text(struct printbuf *, struct bch_dev *); +#endif + struct bch_read_bio { struct bch_fs *c; u64 start_time; @@ -161,11 +165,11 @@ static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, rbio->subvol = inum.subvol; - bch2_trans_run(c, - __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL, NULL, - BCH_READ_retry_if_stale| - BCH_READ_may_promote| - BCH_READ_user_mapped)); + CLASS(btree_trans, trans)(c); + __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL, NULL, + BCH_READ_retry_if_stale| + BCH_READ_may_promote| + BCH_READ_user_mapped); } static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio, @@ -207,8 +211,8 @@ static inline struct bch_read_bio *rbio_init(struct bio *bio, } struct promote_op; -void bch2_promote_op_to_text(struct printbuf *, struct promote_op *); -void bch2_read_bio_to_text(struct printbuf *, struct bch_read_bio *); +void bch2_promote_op_to_text(struct printbuf *, struct bch_fs *, struct promote_op *); +void bch2_read_bio_to_text(struct printbuf *, struct bch_fs *, struct bch_read_bio *); void bch2_fs_io_read_exit(struct bch_fs *); int bch2_fs_io_read_init(struct bch_fs *); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 88b1eec8eff3..d7620138e038 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -32,6 +32,7 @@ #include "trace.h" #include <linux/blkdev.h> +#include <linux/moduleparam.h> #include <linux/prefetch.h> #include <linux/random.h> #include <linux/sched/mm.h> @@ -54,14 +55,9 @@ static inline void bch2_congested_acct(struct bch_dev *ca, u64 io_latency, s64 latency_over = io_latency - latency_threshold; if (latency_threshold && latency_over > 0) { - /* - * bump up congested by approximately latency_over * 4 / - * latency_threshold - we don't need much accuracy here so don't - * bother with the divide: - */ if (atomic_read(&ca->congested) < CONGESTED_MAX) - atomic_add(latency_over >> - max_t(int, ilog2(latency_threshold) - 2, 0), + atomic_add((u32) min(U32_MAX, io_latency * 2) / + (u32) min(U32_MAX, latency_threshold), &ca->congested); ca->congested_last = now; @@ -260,7 +256,7 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, s64 bi_sectors = le64_to_cpu(inode->v.bi_sectors); if (unlikely(bi_sectors + i_sectors_delta < 0)) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "inode %llu i_sectors underflow: %lli + %lli < 0", extent_iter->pos.inode, bi_sectors, i_sectors_delta); @@ -268,7 +264,6 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, bool print = bch2_count_fsck_err(c, inode_i_sectors_underflow, &buf); if (print) bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); if (i_sectors_delta < 0) i_sectors_delta = -bi_sectors; @@ -374,7 +369,6 @@ static int bch2_write_index_default(struct bch_write_op *op) struct bkey_buf sk; struct keylist *keys = &op->insert_keys; struct bkey_i *k = bch2_keylist_front(keys); - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; subvol_inum inum = { .subvol = op->subvol, @@ -384,6 +378,7 @@ static int bch2_write_index_default(struct bch_write_op *op) BUG_ON(!inum.subvol); + CLASS(btree_trans, trans)(c); bch2_bkey_buf_init(&sk); do { @@ -420,7 +415,6 @@ static int bch2_write_index_default(struct bch_write_op *op) bch2_cut_front(iter.pos, k); } while (!bch2_keylist_empty(keys)); - bch2_trans_put(trans); bch2_bkey_buf_exit(&sk, c); return ret; @@ -430,7 +424,7 @@ static int bch2_write_index_default(struct bch_write_op *op) void bch2_write_op_error(struct bch_write_op *op, u64 offset, const char *fmt, ...) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); if (op->subvol) { bch2_inum_offset_err_msg(op->c, &buf, @@ -457,7 +451,6 @@ void bch2_write_op_error(struct bch_write_op *op, u64 offset, const char *fmt, . } bch_err_ratelimited(op->c, "%s", buf.buf); - printbuf_exit(&buf); } void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, @@ -469,8 +462,8 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, struct bch_write_bio *n; unsigned ref_rw = type == BCH_DATA_btree ? READ : WRITE; unsigned ref_idx = type == BCH_DATA_btree - ? BCH_DEV_READ_REF_btree_node_write - : BCH_DEV_WRITE_REF_io_write; + ? (unsigned) BCH_DEV_READ_REF_btree_node_write + : (unsigned) BCH_DEV_WRITE_REF_io_write; BUG_ON(c->opts.nochanges); @@ -1222,6 +1215,7 @@ static bool bch2_extent_is_writeable(struct bch_write_op *op, static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, struct btree_iter *iter, + struct bch_write_op *op, struct bkey_i *orig, struct bkey_s_c k, u64 new_i_size) @@ -1231,11 +1225,13 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, return 0; } - struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); + struct bkey_i *new = bch2_trans_kmalloc_nomemzero(trans, + bkey_bytes(k.k) + sizeof(struct bch_extent_rebalance)); int ret = PTR_ERR_OR_ZERO(new); if (ret) return ret; + bkey_reassemble(new, k); bch2_cut_front(bkey_start_pos(&orig->k), new); bch2_cut_back(orig->k.p, new); @@ -1243,6 +1239,8 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, bkey_for_each_ptr(ptrs, ptr) ptr->unwritten = 0; + bch2_bkey_set_needs_rebalance(op->c, &op->opts, new); + /* * Note that we're not calling bch2_subvol_get_snapshot() in this path - * that was done when we kicked off the write, and here it's important @@ -1267,7 +1265,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) bkey_start_pos(&orig->k), orig->k.p, BTREE_ITER_intent, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - bch2_nocow_write_convert_one_unwritten(trans, &iter, orig, k, op->new_i_size); + bch2_nocow_write_convert_one_unwritten(trans, &iter, op, orig, k, op->new_i_size); })); if (ret) break; @@ -1476,7 +1474,7 @@ err_bucket_stale: break; } - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); if (bch2_fs_inconsistent_on(stale < 0, c, "pointer to invalid bucket in nocow path on device %llu\n %s", stale_at->b.inode, @@ -1486,7 +1484,6 @@ err_bucket_stale: /* We can retry this: */ ret = bch_err_throw(c, transaction_restart); } - printbuf_exit(&buf); goto err_get_ioref; } @@ -1530,7 +1527,7 @@ again: * freeing up space on specific disks, which means that * allocations for specific disks may hang arbitrarily long: */ - ret = bch2_trans_run(c, lockrestart_do(trans, + ret = bch2_trans_do(c, bch2_alloc_sectors_start_trans(trans, op->target, op->opts.erasure_code && !(op->flags & BCH_WRITE_cached), @@ -1540,7 +1537,7 @@ again: op->nr_replicas_required, op->watermark, op->flags, - &op->cl, &wp))); + &op->cl, &wp)); if (unlikely(ret)) { if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) break; diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index f22b05e02c1e..de03e20f6e30 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -88,7 +88,7 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6 static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j) { lockdep_assert_held(&j->lock); - out->atomic++; + guard(printbuf_atomic)(out); if (!out->nr_tabstops) printbuf_tabstop_push(out, 24); @@ -98,8 +98,6 @@ static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j) seq++) bch2_journal_buf_to_text(out, j, seq); prt_printf(out, "last buf %s\n", journal_entry_is_open(j) ? "open" : "closed"); - - --out->atomic; } static inline struct journal_buf * @@ -140,9 +138,9 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags) { struct bch_fs *c = container_of(j, struct bch_fs, journal); bool stuck = false; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); - buf.atomic++; + guard(printbuf_atomic)(&buf); if (!(error == -BCH_ERR_journal_full || error == -BCH_ERR_journal_pin_full) || @@ -150,28 +148,24 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags) (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) return stuck; - spin_lock(&j->lock); + scoped_guard(spinlock, &j->lock) { + if (j->can_discard) + return stuck; - if (j->can_discard) { - spin_unlock(&j->lock); - return stuck; - } + stuck = true; - stuck = true; + /* + * The journal shutdown path will set ->err_seq, but do it here first to + * serialize against concurrent failures and avoid duplicate error + * reports. + */ + if (j->err_seq) + return stuck; - /* - * The journal shutdown path will set ->err_seq, but do it here first to - * serialize against concurrent failures and avoid duplicate error - * reports. - */ - if (j->err_seq) { - spin_unlock(&j->lock); - return stuck; - } - j->err_seq = journal_cur_seq(j); + j->err_seq = journal_cur_seq(j); - __bch2_journal_debug_to_text(&buf, j); - spin_unlock(&j->lock); + __bch2_journal_debug_to_text(&buf, j); + } prt_printf(&buf, bch2_fmt(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)"), bch2_err_str(error)); bch2_print_str(c, KERN_ERR, buf.buf); @@ -179,7 +173,6 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags) printbuf_reset(&buf); bch2_journal_pins_to_text(&buf, j); bch_err(c, "Journal pins:\n%s", buf.buf); - printbuf_exit(&buf); bch2_fatal_error(c); dump_stack(); @@ -269,22 +262,21 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t buf->data->u64s = cpu_to_le32(old.cur_entry_offset); if (trace_journal_entry_close_enabled() && trace) { - struct printbuf pbuf = PRINTBUF; - pbuf.atomic++; - - prt_str(&pbuf, "entry size: "); - prt_human_readable_u64(&pbuf, vstruct_bytes(buf->data)); - prt_newline(&pbuf); - bch2_prt_task_backtrace(&pbuf, current, 1, GFP_NOWAIT); - trace_journal_entry_close(c, pbuf.buf); - printbuf_exit(&pbuf); + CLASS(printbuf, err)(); + guard(printbuf_atomic)(&err); + + prt_str(&err, "entry size: "); + prt_human_readable_u64(&err, vstruct_bytes(buf->data)); + prt_newline(&err); + bch2_prt_task_backtrace(&err, current, 1, GFP_NOWAIT); + trace_journal_entry_close(c, err.buf); } sectors = vstruct_blocks_plus(buf->data, c->block_bits, buf->u64s_reserved) << c->block_bits; if (unlikely(sectors > buf->sectors)) { - struct printbuf err = PRINTBUF; - err.atomic++; + CLASS(printbuf, err)(); + guard(printbuf_atomic)(&err); prt_printf(&err, "journal entry overran reserved space: %u > %u\n", sectors, buf->sectors); @@ -296,7 +288,6 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t bch2_journal_halt_locked(j); bch_err(c, "%s", err.buf); - printbuf_exit(&err); return; } @@ -344,9 +335,8 @@ void bch2_journal_halt_locked(struct journal *j) void bch2_journal_halt(struct journal *j) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); bch2_journal_halt_locked(j); - spin_unlock(&j->lock); } static bool journal_entry_want_write(struct journal *j) @@ -371,13 +361,8 @@ static bool journal_entry_want_write(struct journal *j) bool bch2_journal_entry_close(struct journal *j) { - bool ret; - - spin_lock(&j->lock); - ret = journal_entry_want_write(j); - spin_unlock(&j->lock); - - return ret; + guard(spinlock)(&j->lock); + return journal_entry_want_write(j); } /* @@ -394,7 +379,7 @@ static int journal_entry_open(struct journal *j) lockdep_assert_held(&j->lock); BUG_ON(journal_entry_is_open(j)); - BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); + BUG_ON(c->sb.clean); if (j->blocked) return bch_err_throw(c, journal_blocked); @@ -542,7 +527,7 @@ static void journal_write_work(struct work_struct *work) { struct journal *j = container_of(work, struct journal, write_work.work); - spin_lock(&j->lock); + guard(spinlock)(&j->lock); if (__journal_entry_is_open(j->reservations)) { long delta = journal_cur_buf(j)->expires - jiffies; @@ -551,7 +536,6 @@ static void journal_write_work(struct work_struct *work) else __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true); } - spin_unlock(&j->lock); } static void journal_buf_prealloc(struct journal *j) @@ -652,34 +636,32 @@ out: if (ret == -BCH_ERR_journal_max_in_flight && track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true) && trace_journal_entry_full_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_printbuf_make_room(&buf, 4096); - spin_lock(&j->lock); - prt_printf(&buf, "seq %llu\n", journal_cur_seq(j)); - bch2_journal_bufs_to_text(&buf, j); - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) { + prt_printf(&buf, "seq %llu\n", journal_cur_seq(j)); + bch2_journal_bufs_to_text(&buf, j); + } trace_journal_entry_full(c, buf.buf); - printbuf_exit(&buf); count_event(c, journal_entry_full); } if (ret == -BCH_ERR_journal_max_open && track_event_change(&c->times[BCH_TIME_blocked_journal_max_open], true) && trace_journal_entry_full_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_printbuf_make_room(&buf, 4096); - spin_lock(&j->lock); - prt_printf(&buf, "seq %llu\n", journal_cur_seq(j)); - bch2_journal_bufs_to_text(&buf, j); - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) { + prt_printf(&buf, "seq %llu\n", journal_cur_seq(j)); + bch2_journal_bufs_to_text(&buf, j); + } trace_journal_entry_full(c, buf.buf); - printbuf_exit(&buf); count_event(c, journal_entry_full); } @@ -751,11 +733,10 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, remaining_wait)) return ret; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_journal_debug_to_text(&buf, j); bch2_print_str(c, KERN_ERR, buf.buf); prt_printf(&buf, bch2_fmt(c, "Journal stuck? Waited for 10 seconds, err %s"), bch2_err_str(ret)); - printbuf_exit(&buf); closure_wait_event(&j->async_wait, !bch2_err_matches(ret = __journal_res_get(j, res, flags), BCH_ERR_operation_blocked) || @@ -772,11 +753,13 @@ void bch2_journal_entry_res_resize(struct journal *j, union journal_res_state state; int d = new_u64s - res->u64s; - spin_lock(&j->lock); + guard(spinlock)(&j->lock); + + j->entry_u64s_reserved += d; + res->u64s += d; - j->entry_u64s_reserved += d; if (d <= 0) - goto out; + return; j->cur_entry_u64s = max_t(int, 0, j->cur_entry_u64s - d); state = READ_ONCE(j->reservations); @@ -791,9 +774,6 @@ void bch2_journal_entry_res_resize(struct journal *j, } else { journal_cur_buf(j)->u64s_reserved += d; } -out: - spin_unlock(&j->lock); - res->u64s += d; } /* journal flushing: */ @@ -944,7 +924,6 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end) { struct bch_fs *c = container_of(j, struct bch_fs, journal); u64 unwritten_seq; - bool ret = false; if (!(c->sb.features & (1ULL << BCH_FEATURE_journal_no_flush))) return false; @@ -952,9 +931,10 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end) if (c->journal.flushed_seq_ondisk >= start) return false; - spin_lock(&j->lock); + guard(spinlock)(&j->lock); + if (c->journal.flushed_seq_ondisk >= start) - goto out; + return false; for (unwritten_seq = journal_last_unwritten_seq(j); unwritten_seq < end; @@ -963,15 +943,12 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end) /* journal flush already in flight, or flush requseted */ if (buf->must_flush) - goto out; + return false; buf->noflush = true; } - ret = true; -out: - spin_unlock(&j->lock); - return ret; + return true; } static int __bch2_journal_meta(struct journal *j) @@ -1010,19 +987,18 @@ int bch2_journal_meta(struct journal *j) void bch2_journal_unblock(struct journal *j) { - spin_lock(&j->lock); - if (!--j->blocked && - j->cur_entry_offset_if_blocked < JOURNAL_ENTRY_CLOSED_VAL && - j->reservations.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL) { - union journal_res_state old, new; - - old.v = atomic64_read(&j->reservations.counter); - do { - new.v = old.v; - new.cur_entry_offset = j->cur_entry_offset_if_blocked; - } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v)); - } - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) + if (!--j->blocked && + j->cur_entry_offset_if_blocked < JOURNAL_ENTRY_CLOSED_VAL && + j->reservations.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL) { + union journal_res_state old, new; + + old.v = atomic64_read(&j->reservations.counter); + do { + new.v = old.v; + new.cur_entry_offset = j->cur_entry_offset_if_blocked; + } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v)); + } journal_wake(j); } @@ -1050,9 +1026,8 @@ static void __bch2_journal_block(struct journal *j) void bch2_journal_block(struct journal *j) { - spin_lock(&j->lock); - __bch2_journal_block(j); - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) + __bch2_journal_block(j); journal_quiesce(j); } @@ -1065,7 +1040,7 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou /* We're inside wait_event(), but using mutex_lock(: */ sched_annotate_sleep(); mutex_lock(&j->buf_lock); - spin_lock(&j->lock); + guard(spinlock)(&j->lock); max_seq = min(max_seq, journal_cur_seq(j)); for (u64 seq = journal_last_unwritten_seq(j); @@ -1088,11 +1063,12 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou ret = journal_state_count(s, idx & JOURNAL_STATE_BUF_MASK) > open ? ERR_PTR(-EAGAIN) : buf; + if (!ret) + smp_mb(); break; } } - spin_unlock(&j->lock); if (IS_ERR_OR_NULL(ret)) mutex_unlock(&j->buf_lock); return ret; @@ -1147,16 +1123,14 @@ static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr, if (ret) break; - if (!new_fs) { - ret = bch2_trans_run(c, - bch2_trans_mark_metadata_bucket(trans, ca, - ob[nr_got]->bucket, BCH_DATA_journal, - ca->mi.bucket_size, BTREE_TRIGGER_transactional)); - if (ret) { - bch2_open_bucket_put(c, ob[nr_got]); - bch_err_msg(c, ret, "marking new journal buckets"); - break; - } + CLASS(btree_trans, trans)(c); + ret = bch2_trans_mark_metadata_bucket(trans, ca, + ob[nr_got]->bucket, BCH_DATA_journal, + ca->mi.bucket_size, BTREE_TRIGGER_transactional); + if (ret) { + bch2_open_bucket_put(c, ob[nr_got]); + bch_err_msg(c, ret, "marking new journal buckets"); + break; } bu[nr_got] = ob[nr_got]->bucket; @@ -1226,12 +1200,13 @@ err_unblock: mutex_unlock(&c->sb_lock); } - if (ret && !new_fs) + if (ret) { + CLASS(btree_trans, trans)(c); for (i = 0; i < nr_got; i++) - bch2_trans_run(c, - bch2_trans_mark_metadata_bucket(trans, ca, + bch2_trans_mark_metadata_bucket(trans, ca, bu[i], BCH_DATA_free, 0, - BTREE_TRIGGER_transactional)); + BTREE_TRIGGER_transactional); + } err_free: for (i = 0; i < nr_got; i++) bch2_open_bucket_put(c, ob[i]); @@ -1296,10 +1271,8 @@ static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, unsigned nr) { - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); int ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, false); - up_write(&c->state_lock); - bch_err_fn(c, ret); return ret; } @@ -1423,21 +1396,18 @@ int bch2_fs_journal_alloc(struct bch_fs *c) static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx) { - bool ret = false; - u64 seq; + guard(spinlock)(&j->lock); - spin_lock(&j->lock); - for (seq = journal_last_unwritten_seq(j); - seq <= journal_cur_seq(j) && !ret; + for (u64 seq = journal_last_unwritten_seq(j); + seq <= journal_cur_seq(j); seq++) { struct journal_buf *buf = journal_seq_to_buf(j, seq); if (bch2_bkey_has_device_c(bkey_i_to_s_c(&buf->key), dev_idx)) - ret = true; + return true; } - spin_unlock(&j->lock); - return ret; + return false; } void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca) @@ -1556,13 +1526,11 @@ int bch2_fs_journal_start(struct journal *j, u64 last_seq, u64 cur_seq) if (!had_entries) j->last_empty_seq = cur_seq - 1; /* to match j->seq */ - spin_lock(&j->lock); - j->last_flush_write = jiffies; - - j->reservations.idx = journal_cur_seq(j); - - c->last_bucket_seq_cleanup = journal_cur_seq(j); - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) { + j->last_flush_write = jiffies; + j->reservations.idx = journal_cur_seq(j); + c->last_bucket_seq_cleanup = journal_cur_seq(j); + } return 0; } @@ -1573,13 +1541,12 @@ void bch2_journal_set_replay_done(struct journal *j) * journal_space_available must happen before setting JOURNAL_running * JOURNAL_running must happen before JOURNAL_replay_done */ - spin_lock(&j->lock); + guard(spinlock)(&j->lock); bch2_journal_space_available(j); set_bit(JOURNAL_need_flush_write, &j->flags); set_bit(JOURNAL_running, &j->flags); set_bit(JOURNAL_replay_done, &j->flags); - spin_unlock(&j->lock); } /* init/exit: */ @@ -1589,7 +1556,7 @@ void bch2_dev_journal_exit(struct bch_dev *ca) struct journal_device *ja = &ca->journal; for (unsigned i = 0; i < ARRAY_SIZE(ja->bio); i++) { - kfree(ja->bio[i]); + kvfree(ja->bio[i]); ja->bio[i] = NULL; } @@ -1626,7 +1593,16 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) unsigned nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE); for (unsigned i = 0; i < ARRAY_SIZE(ja->bio); i++) { - ja->bio[i] = kzalloc(struct_size(ja->bio[i], bio.bi_inline_vecs, + /* + * kvzalloc() is not what we want to be using here: + * JOURNAL_ENTRY_SIZE_MAX is probably quite a bit bigger than it + * needs to be. + * + * But changing that will require performance testing - + * performance can be sensitive to anything that affects journal + * pipelining. + */ + ja->bio[i] = kvzalloc(struct_size(ja->bio[i], bio.bi_inline_vecs, nr_bvecs), GFP_KERNEL); if (!ja->bio[i]) return bch_err_throw(c, ENOMEM_dev_journal_init); @@ -1727,9 +1703,10 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) printbuf_tabstops_reset(out); printbuf_tabstop_push(out, 28); - out->atomic++; + guard(printbuf_atomic)(out); guard(rcu)(); + s = READ_ONCE(j->reservations); prt_printf(out, "flags:\t"); @@ -1819,13 +1796,10 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) } prt_printf(out, "replicas want %u need %u\n", c->opts.metadata_replicas, c->opts.metadata_replicas_required); - - --out->atomic; } void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); __bch2_journal_debug_to_text(out, j); - spin_unlock(&j->lock); } diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 977907038d98..c05aa94237f8 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -267,7 +267,7 @@ static inline union journal_res_state journal_state_buf_put(struct journal *j, u { union journal_res_state s; - s.v = atomic64_sub_return(((union journal_res_state) { + s.v = atomic64_sub_return_release(((union journal_res_state) { .buf0_count = idx == 0, .buf1_count = idx == 1, .buf2_count = idx == 2, @@ -297,9 +297,8 @@ static inline void bch2_journal_buf_put(struct journal *j, u64 seq) s = journal_state_buf_put(j, idx); if (!journal_state_count(s, idx)) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); bch2_journal_buf_put_final(j, seq); - spin_unlock(&j->lock); } else if (unlikely(s.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL)) wake_up(&j->wait); } diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 2d6ce4348a22..2835250a14c4 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -35,7 +35,8 @@ void bch2_journal_pos_from_member_info_set(struct bch_fs *c) void bch2_journal_pos_from_member_info_resume(struct bch_fs *c) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); + for_each_member_device(c, ca) { struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); @@ -46,16 +47,14 @@ void bch2_journal_pos_from_member_info_resume(struct bch_fs *c) if (offset <= ca->mi.bucket_size) ca->journal.sectors_free = ca->mi.bucket_size - offset; } - mutex_unlock(&c->sb_lock); } static void bch2_journal_ptr_to_text(struct printbuf *out, struct bch_fs *c, struct journal_ptr *p) { - struct bch_dev *ca = bch2_dev_tryget_noerror(c, p->dev); + CLASS(bch2_dev_tryget_noerror, ca)(c, p->dev); prt_printf(out, "%s %u:%u:%u (sector %llu)", ca ? ca->name : "(invalid dev)", p->dev, p->bucket, p->bucket_offset, p->sector); - bch2_dev_put(ca); } void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c, struct journal_replay *j) @@ -157,7 +156,7 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, struct journal_replay **_i, *i, *dup; size_t bytes = vstruct_bytes(j); u64 last_seq = !JSET_NO_FLUSH(j) ? le64_to_cpu(j->last_seq) : 0; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = JOURNAL_ENTRY_ADD_OK; if (last_seq && c->opts.journal_rewind) @@ -223,7 +222,7 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, ret = darray_push(&dup->ptrs, entry_ptr); if (ret) - goto out; + return ret; bch2_journal_replay_to_text(&buf, c, dup); @@ -240,7 +239,7 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, if (entry_ptr.csum_good && !identical) goto replace; - goto out; + return ret; } replace: i = kvmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL); @@ -263,9 +262,7 @@ replace: } *_i = i; -out: fsck_err: - printbuf_exit(&buf); return ret; } @@ -312,7 +309,7 @@ static void journal_entry_err_msg(struct printbuf *out, #define journal_entry_err(c, version, jset, entry, _err, msg, ...) \ ({ \ - struct printbuf _buf = PRINTBUF; \ + CLASS(printbuf, _buf)(); \ \ journal_entry_err_msg(&_buf, version, jset, entry); \ prt_printf(&_buf, msg, ##__VA_ARGS__); \ @@ -331,7 +328,6 @@ static void journal_entry_err_msg(struct printbuf *out, break; \ } \ \ - printbuf_exit(&_buf); \ true; \ }) @@ -617,7 +613,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, struct jset_entry_data_usage *u = container_of(entry, struct jset_entry_data_usage, entry); unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64); - struct printbuf err = PRINTBUF; + CLASS(printbuf, err)(); int ret = 0; if (journal_entry_err_on(bytes < sizeof(*u) || @@ -626,7 +622,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, journal_entry_data_usage_bad_size, "invalid journal entry usage: bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); - goto out; + return 0; } if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c, &err), @@ -634,11 +630,9 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, journal_entry_data_usage_bad_size, "invalid journal entry usage: %s", err.buf)) { journal_entry_null_range(entry, vstruct_next(entry)); - goto out; + return 0; } -out: fsck_err: - printbuf_exit(&err); return ret; } @@ -1165,17 +1159,16 @@ reread: vstruct_end(j) - (void *) j->encrypted_start); bch2_fs_fatal_err_on(ret, c, "decrypting journal entry: %s", bch2_err_str(ret)); - mutex_lock(&jlist->lock); - ret = journal_entry_add(c, ca, (struct journal_ptr) { - .csum_good = csum_good, - .csum = csum, - .dev = ca->dev_idx, - .bucket = bucket, - .bucket_offset = offset - - bucket_to_sector(ca, ja->buckets[bucket]), - .sector = offset, - }, jlist, j); - mutex_unlock(&jlist->lock); + scoped_guard(mutex, &jlist->lock) + ret = journal_entry_add(c, ca, (struct journal_ptr) { + .csum_good = csum_good, + .csum = csum, + .dev = ca->dev_idx, + .bucket = bucket, + .bucket_offset = offset - + bucket_to_sector(ca, ja->buckets[bucket]), + .sector = offset, + }, jlist, j); switch (ret) { case JOURNAL_ENTRY_ADD_OK: @@ -1235,16 +1228,15 @@ out: closure_return(cl); return; err: - mutex_lock(&jlist->lock); - jlist->ret = ret; - mutex_unlock(&jlist->lock); + scoped_guard(mutex, &jlist->lock) + jlist->ret = ret; goto out; } noinline_for_stack static void bch2_journal_print_checksum_error(struct bch_fs *c, struct journal_replay *j) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); enum bch_csum_type csum_type = JSET_CSUM_TYPE(&j->j); @@ -1271,7 +1263,6 @@ static void bch2_journal_print_checksum_error(struct bch_fs *c, struct journal_r prt_printf(&buf, "\n(had good copy on another device)"); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } struct u64_range bch2_journal_entry_missing_range(struct bch_fs *c, u64 start, u64 end) @@ -1299,7 +1290,6 @@ struct u64_range bch2_journal_entry_missing_range(struct bch_fs *c, u64 start, u noinline_for_stack static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 end_seq) { - struct printbuf buf = PRINTBUF; int ret = 0; struct genradix_iter radix_iter; @@ -1318,7 +1308,7 @@ static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 e struct u64_range missing; while ((missing = bch2_journal_entry_missing_range(c, seq, le64_to_cpu(i->j.seq))).start) { - printbuf_reset(&buf); + CLASS(printbuf, buf)(); prt_printf(&buf, "journal entries %llu-%llu missing! (replaying %llu-%llu)", missing.start, missing.end - 1, start_seq, end_seq); @@ -1342,7 +1332,6 @@ static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 e seq = le64_to_cpu(i->j.seq) + 1; } fsck_err: - printbuf_exit(&buf); return ret; } @@ -1354,7 +1343,6 @@ int bch2_journal_read(struct bch_fs *c, struct journal_list jlist; struct journal_replay *i, **_i; struct genradix_iter radix_iter; - struct printbuf buf = PRINTBUF; bool degraded = false, last_write_torn = false; u64 seq; int ret = 0; @@ -1443,24 +1431,27 @@ int bch2_journal_read(struct bch_fs *c, return 0; } - printbuf_reset(&buf); - prt_printf(&buf, "journal read done, replaying entries %llu-%llu", - *last_seq, *blacklist_seq - 1); - - /* - * Drop blacklisted entries and entries older than last_seq (or start of - * journal rewind: - */ u64 drop_before = *last_seq; - if (c->opts.journal_rewind) { - drop_before = min(drop_before, c->opts.journal_rewind); - prt_printf(&buf, " (rewinding from %llu)", c->opts.journal_rewind); + { + CLASS(printbuf, buf)(); + prt_printf(&buf, "journal read done, replaying entries %llu-%llu", + *last_seq, *blacklist_seq - 1); + + /* + * Drop blacklisted entries and entries older than last_seq (or start of + * journal rewind: + */ + if (c->opts.journal_rewind) { + drop_before = min(drop_before, c->opts.journal_rewind); + prt_printf(&buf, " (rewinding from %llu)", c->opts.journal_rewind); + } + + *last_seq = drop_before; + if (*start_seq != *blacklist_seq) + prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1); + bch_info(c, "%s", buf.buf); } - *last_seq = drop_before; - if (*start_seq != *blacklist_seq) - prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1); - bch_info(c, "%s", buf.buf); genradix_for_each(&c->journal_entries, radix_iter, _i) { i = *_i; @@ -1483,7 +1474,7 @@ int bch2_journal_read(struct bch_fs *c, ret = bch2_journal_check_for_missing(c, drop_before, *blacklist_seq - 1); if (ret) - goto err; + return ret; genradix_for_each(&c->journal_entries, radix_iter, _i) { union bch_replicas_padded replicas = { @@ -1512,14 +1503,14 @@ int bch2_journal_read(struct bch_fs *c, i->ptrs.data[0].sector, READ); if (ret) - goto err; + return ret; darray_for_each(i->ptrs, ptr) replicas_entry_add_dev(&replicas.e, ptr->dev); bch2_replicas_entry_sort(&replicas.e); - printbuf_reset(&buf); + CLASS(printbuf, buf)(); bch2_replicas_entry_to_text(&buf, &replicas.e); if (!degraded && @@ -1530,12 +1521,10 @@ int bch2_journal_read(struct bch_fs *c, le64_to_cpu(i->j.seq), buf.buf))) { ret = bch2_mark_replicas(c, &replicas.e); if (ret) - goto err; + return ret; } } -err: fsck_err: - printbuf_exit(&buf); return ret; } @@ -1695,10 +1684,10 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) memcpy(new_buf, buf->data, buf->buf_size); - spin_lock(&j->lock); - swap(buf->data, new_buf); - swap(buf->buf_size, new_size); - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) { + swap(buf->data, new_buf); + swap(buf->buf_size, new_size); + } kvfree(new_buf); } @@ -1725,7 +1714,7 @@ static CLOSURE_CALLBACK(journal_write_done) } if (err && !bch2_journal_error(j)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); if (err == -BCH_ERR_journal_write_err) @@ -1737,7 +1726,6 @@ static CLOSURE_CALLBACK(journal_write_done) bch2_fs_emergency_read_only2(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } closure_debug_destroy(cl); @@ -1780,6 +1768,7 @@ static CLOSURE_CALLBACK(journal_write_done) closure_wake_up(&c->freelist_wait); bch2_reset_alloc_cursors(c); + do_discards = true; } j->seq_ondisk = seq; @@ -1878,7 +1867,11 @@ static CLOSURE_CALLBACK(journal_write_submit) jbio->submit_time = local_clock(); - bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META); + /* + * blk-wbt.c throttles all writes except those that have both + * REQ_SYNC and REQ_IDLE set... + */ + bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_IDLE|REQ_META); bio->bi_iter.bi_sector = ptr->offset; bio->bi_end_io = journal_write_endio; bio->bi_private = ca; @@ -2018,9 +2011,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) } } - spin_lock(&c->journal.lock); - w->need_flush_to_write_buffer = false; - spin_unlock(&c->journal.lock); + scoped_guard(spinlock, &c->journal.lock) + w->need_flush_to_write_buffer = false; start = end = vstruct_last(jset); @@ -2158,21 +2150,21 @@ CLOSURE_CALLBACK(bch2_journal_write) j->write_start_time = local_clock(); - spin_lock(&j->lock); - if (nr_rw_members > 1) - w->separate_flush = true; + scoped_guard(spinlock, &j->lock) { + if (nr_rw_members > 1) + w->separate_flush = true; - ret = bch2_journal_write_pick_flush(j, w); - spin_unlock(&j->lock); + ret = bch2_journal_write_pick_flush(j, w); + } if (unlikely(ret)) goto err; - mutex_lock(&j->buf_lock); - journal_buf_realloc(j, w); + scoped_guard(mutex, &j->buf_lock) { + journal_buf_realloc(j, w); - ret = bch2_journal_write_prep(j, w); - mutex_unlock(&j->buf_lock); + ret = bch2_journal_write_prep(j, w); + } if (unlikely(ret)) goto err; @@ -2193,22 +2185,22 @@ CLOSURE_CALLBACK(bch2_journal_write) if (unlikely(ret)) goto err; - spin_lock(&j->lock); - /* - * write is allocated, no longer need to account for it in - * bch2_journal_space_available(): - */ - w->sectors = 0; - w->write_allocated = true; - j->entry_bytes_written += vstruct_bytes(w->data); + scoped_guard(spinlock, &j->lock) { + /* + * write is allocated, no longer need to account for it in + * bch2_journal_space_available(): + */ + w->sectors = 0; + w->write_allocated = true; + j->entry_bytes_written += vstruct_bytes(w->data); - /* - * journal entry has been compacted and allocated, recalculate space - * available: - */ - bch2_journal_space_available(j); - bch2_journal_do_writes(j); - spin_unlock(&j->lock); + /* + * journal entry has been compacted and allocated, recalculate space + * available: + */ + bch2_journal_space_available(j); + bch2_journal_do_writes(j); + } w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key)); @@ -2232,7 +2224,7 @@ CLOSURE_CALLBACK(bch2_journal_write) return; err_allocate_write: if (!bch2_journal_error(j)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_journal_debug_to_text(&buf, j); prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"), @@ -2240,7 +2232,6 @@ err_allocate_write: vstruct_sectors(w->data, c->block_bits), bch2_err_str(ret)); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } err: bch2_fatal_error(c); diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index cd6201741c59..f23e5ee9ad75 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -170,6 +170,12 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne return (struct journal_space) { 0, 0 }; /* + * It's possible for bucket size to be misaligned w.r.t. the filesystem + * block size: + */ + min_bucket_size = round_down(min_bucket_size, block_sectors(c)); + + /* * We sorted largest to smallest, and we want the smallest out of the * @nr_devs_want largest devices: */ @@ -215,8 +221,8 @@ void bch2_journal_space_available(struct journal *j) if (nr_online < metadata_replicas_required(c)) { if (!(c->sb.features & BIT_ULL(BCH_FEATURE_small_image))) { - struct printbuf buf = PRINTBUF; - buf.atomic++; + CLASS(printbuf, buf)(); + guard(printbuf_atomic)(&buf); prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n" "rw journal devs:", nr_online, metadata_replicas_required(c)); @@ -224,7 +230,6 @@ void bch2_journal_space_available(struct journal *j) prt_printf(&buf, " %s", ca->name); bch_err(c, "%s", buf.buf); - printbuf_exit(&buf); } ret = bch_err_throw(c, insufficient_journal_devices); goto out; @@ -274,11 +279,8 @@ static bool __should_discard_bucket(struct journal *j, struct journal_device *ja static bool should_discard_bucket(struct journal *j, struct journal_device *ja) { - spin_lock(&j->lock); - bool ret = __should_discard_bucket(j, ja); - spin_unlock(&j->lock); - - return ret; + guard(spinlock)(&j->lock); + return __should_discard_bucket(j, ja); } /* @@ -289,7 +291,7 @@ void bch2_journal_do_discards(struct journal *j) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - mutex_lock(&j->discard_lock); + guard(mutex)(&j->discard_lock); for_each_rw_member(c, ca, BCH_DEV_WRITE_REF_journal_do_discards) { struct journal_device *ja = &ca->journal; @@ -303,15 +305,12 @@ void bch2_journal_do_discards(struct journal *j) ja->buckets[ja->discard_idx]), ca->mi.bucket_size, GFP_NOFS); - spin_lock(&j->lock); - ja->discard_idx = (ja->discard_idx + 1) % ja->nr; - - bch2_journal_space_available(j); - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) { + ja->discard_idx = (ja->discard_idx + 1) % ja->nr; + bch2_journal_space_available(j); + } } } - - mutex_unlock(&j->discard_lock); } /* @@ -352,9 +351,8 @@ bool __bch2_journal_pin_put(struct journal *j, u64 seq) void bch2_journal_pin_put(struct journal *j, u64 seq) { if (__bch2_journal_pin_put(j, seq)) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); bch2_journal_reclaim_fast(j); - spin_unlock(&j->lock); } } @@ -387,10 +385,9 @@ static inline bool __journal_pin_drop(struct journal *j, void bch2_journal_pin_drop(struct journal *j, struct journal_entry_pin *pin) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); if (__journal_pin_drop(j, pin)) bch2_journal_reclaim_fast(j); - spin_unlock(&j->lock); } static enum journal_pin_type journal_pin_type(struct journal_entry_pin *pin, @@ -437,7 +434,7 @@ void bch2_journal_pin_copy(struct journal *j, struct journal_entry_pin *src, journal_pin_flush_fn flush_fn) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); u64 seq = READ_ONCE(src->seq); @@ -448,7 +445,6 @@ void bch2_journal_pin_copy(struct journal *j, * longer to exist, but that means there's no longer anything to * copy and we can bail out here: */ - spin_unlock(&j->lock); return; } @@ -465,31 +461,32 @@ void bch2_journal_pin_copy(struct journal *j, */ if (seq == journal_last_seq(j)) journal_wake(j); - spin_unlock(&j->lock); } void bch2_journal_pin_set(struct journal *j, u64 seq, struct journal_entry_pin *pin, journal_pin_flush_fn flush_fn) { - spin_lock(&j->lock); + bool wake; - BUG_ON(seq < journal_last_seq(j)); + scoped_guard(spinlock, &j->lock) { + BUG_ON(seq < journal_last_seq(j)); - bool reclaim = __journal_pin_drop(j, pin); + bool reclaim = __journal_pin_drop(j, pin); - bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn)); + bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn)); - if (reclaim) - bch2_journal_reclaim_fast(j); - /* - * If the journal is currently full, we might want to call flush_fn - * immediately: - */ - if (seq == journal_last_seq(j)) - journal_wake(j); + if (reclaim) + bch2_journal_reclaim_fast(j); + /* + * If the journal is currently full, we might want to call flush_fn + * immediately: + */ + wake = seq == journal_last_seq(j); + } - spin_unlock(&j->lock); + if (wake) + journal_wake(j); } /** @@ -574,17 +571,17 @@ static size_t journal_flush_pins(struct journal *j, j->last_flushed = jiffies; - spin_lock(&j->lock); - pin = journal_get_next_pin(j, seq_to_flush, - allowed_below, - allowed_above, &seq); - if (pin) { - BUG_ON(j->flush_in_progress); - j->flush_in_progress = pin; - j->flush_in_progress_dropped = false; - flush_fn = pin->flush; + scoped_guard(spinlock, &j->lock) { + pin = journal_get_next_pin(j, seq_to_flush, + allowed_below, + allowed_above, &seq); + if (pin) { + BUG_ON(j->flush_in_progress); + j->flush_in_progress = pin; + j->flush_in_progress_dropped = false; + flush_fn = pin->flush; + } } - spin_unlock(&j->lock); if (!pin) break; @@ -597,13 +594,13 @@ static size_t journal_flush_pins(struct journal *j, err = flush_fn(j, pin, seq); - spin_lock(&j->lock); - /* Pin might have been dropped or rearmed: */ - if (likely(!err && !j->flush_in_progress_dropped)) - list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]); - j->flush_in_progress = NULL; - j->flush_in_progress_dropped = false; - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) { + /* Pin might have been dropped or rearmed: */ + if (likely(!err && !j->flush_in_progress_dropped)) + list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]); + j->flush_in_progress = NULL; + j->flush_in_progress_dropped = false; + } wake_up(&j->pin_flush_wait); @@ -764,9 +761,8 @@ static int bch2_journal_reclaim_thread(void *arg) j->reclaim_kicked = false; - mutex_lock(&j->reclaim_lock); - ret = __bch2_journal_reclaim(j, false, kicked); - mutex_unlock(&j->reclaim_lock); + scoped_guard(mutex, &j->reclaim_lock) + ret = __bch2_journal_reclaim(j, false, kicked); now = jiffies; delay = msecs_to_jiffies(c->opts.journal_reclaim_delay); @@ -782,9 +778,8 @@ static int bch2_journal_reclaim_thread(void *arg) if (j->reclaim_kicked) break; - spin_lock(&j->lock); - journal_empty = fifo_empty(&j->pin); - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) + journal_empty = fifo_empty(&j->pin); long timeout = j->next_reclaim - jiffies; @@ -838,10 +833,10 @@ int bch2_journal_reclaim_start(struct journal *j) static bool journal_pins_still_flushing(struct journal *j, u64 seq_to_flush, unsigned types) { + guard(spinlock)(&j->lock); + struct journal_entry_pin_list *pin_list; u64 seq; - - spin_lock(&j->lock); fifo_for_each_entry_ptr(pin_list, &j->pin, seq) { if (seq > seq_to_flush) break; @@ -849,12 +844,9 @@ static bool journal_pins_still_flushing(struct journal *j, u64 seq_to_flush, for (unsigned i = 0; i < JOURNAL_PIN_TYPE_NR; i++) if ((BIT(i) & types) && (!list_empty(&pin_list->unflushed[i]) || - !list_empty(&pin_list->flushed[i]))) { - spin_unlock(&j->lock); + !list_empty(&pin_list->flushed[i]))) return true; - } } - spin_unlock(&j->lock); return false; } @@ -875,32 +867,54 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, if (ret) return ret; - mutex_lock(&j->reclaim_lock); + guard(mutex)(&j->reclaim_lock); for (int type = JOURNAL_PIN_TYPE_NR - 1; type >= 0; --type) if (journal_flush_pins_or_still_flushing(j, seq_to_flush, BIT(type))) { *did_work = true; - goto unlock; + + /* + * Question from Dan Carpenter, on the early return: + * + * If journal_flush_pins_or_still_flushing() returns + * true, then the flush hasn't complete and we must + * return 0; we want the outer closure_wait_event() in + * journal_flush_pins() to continue. + * + * The early return is there because we don't want to + * call journal_entry_close() until we've finished + * flushing all outstanding journal pins - otherwise + * seq_to_flush can be U64_MAX, and we'll close a bunch + * of journal entries and write tiny ones completely + * unnecessarily. + * + * Having the early return be in the loop where we loop + * over types is important, because flushing one journal + * pin can cause new journal pins to be added (even of + * the same type, btree node writes may generate more + * btree node writes, when updating the parent pointer + * has a full node and has to trigger a split/compact). + * + * This is part of our shutdown sequence, where order of + * flushing is important in order to make sure that it + * terminates... + */ + return 0; } if (seq_to_flush > journal_cur_seq(j)) bch2_journal_entry_close(j); - spin_lock(&j->lock); /* * If journal replay hasn't completed, the unreplayed journal entries * hold refs on their corresponding sequence numbers */ + guard(spinlock)(&j->lock); ret = !test_bit(JOURNAL_replay_done, &j->flags) || journal_last_seq(j) > seq_to_flush || !fifo_used(&j->pin); - - spin_unlock(&j->lock); -unlock: - mutex_unlock(&j->reclaim_lock); - return ret; } @@ -925,13 +939,12 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) u64 iter, seq = 0; int ret = 0; - spin_lock(&j->lock); - fifo_for_each_entry_ptr(p, &j->pin, iter) - if (dev_idx >= 0 - ? bch2_dev_list_has_dev(p->devs, dev_idx) - : p->devs.nr < c->opts.metadata_replicas) - seq = iter; - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) + fifo_for_each_entry_ptr(p, &j->pin, iter) + if (dev_idx >= 0 + ? bch2_dev_list_has_dev(p->devs, dev_idx) + : p->devs.nr < c->opts.metadata_replicas) + seq = iter; bch2_journal_flush_pins(j, seq); @@ -939,7 +952,7 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) if (ret) return ret; - mutex_lock(&c->replicas_gc_lock); + guard(mutex)(&c->replicas_gc_lock); bch2_replicas_gc_start(c, 1 << BCH_DATA_journal); /* @@ -954,29 +967,25 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) goto err; seq = 0; - spin_lock(&j->lock); - while (!ret) { - union bch_replicas_padded replicas; + scoped_guard(spinlock, &j->lock) + while (!ret) { + union bch_replicas_padded replicas; - seq = max(seq, journal_last_seq(j)); - if (seq >= j->pin.back) - break; - bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, - journal_seq_pin(j, seq)->devs); - seq++; + seq = max(seq, journal_last_seq(j)); + if (seq >= j->pin.back) + break; + bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, + journal_seq_pin(j, seq)->devs); + seq++; - if (replicas.e.nr_devs) { - spin_unlock(&j->lock); - ret = bch2_mark_replicas(c, &replicas.e); - spin_lock(&j->lock); + if (replicas.e.nr_devs) { + spin_unlock(&j->lock); + ret = bch2_mark_replicas(c, &replicas.e); + spin_lock(&j->lock); + } } - } - spin_unlock(&j->lock); err: - ret = bch2_replicas_gc_end(c, ret); - mutex_unlock(&c->replicas_gc_lock); - - return ret; + return bch2_replicas_gc_end(c, ret); } bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq) @@ -984,20 +993,16 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 struct journal_entry_pin_list *pin_list; struct journal_entry_pin *pin; - spin_lock(&j->lock); - if (!test_bit(JOURNAL_running, &j->flags)) { - spin_unlock(&j->lock); + guard(spinlock)(&j->lock); + guard(printbuf_atomic)(out); + + if (!test_bit(JOURNAL_running, &j->flags)) return true; - } *seq = max(*seq, j->pin.front); - if (*seq >= j->pin.back) { - spin_unlock(&j->lock); + if (*seq >= j->pin.back) return true; - } - - out->atomic++; pin_list = journal_seq_pin(j, *seq); @@ -1016,9 +1021,6 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 printbuf_indent_sub(out, 2); - --out->atomic; - spin_unlock(&j->lock); - return false; } diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c index 6361809b5e2e..399db5b77d9f 100644 --- a/fs/bcachefs/journal_seq_blacklist.c +++ b/fs/bcachefs/journal_seq_blacklist.c @@ -49,7 +49,7 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) unsigned i = 0, nr; int ret = 0; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); bl = bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist); nr = blacklist_nr_entries(bl); @@ -77,10 +77,8 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist, sb_blacklist_u64s(nr + 1)); - if (!bl) { - ret = bch_err_throw(c, ENOSPC_sb_journal_seq_blacklist); - goto out; - } + if (!bl) + return bch_err_throw(c, ENOSPC_sb_journal_seq_blacklist); array_insert_item(bl->start, nr, i, ((struct journal_seq_blacklist_entry) { .start = cpu_to_le64(start), @@ -89,8 +87,6 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << BCH_FEATURE_journal_seq_blacklist_v3); ret = bch2_write_super(c); -out: - mutex_unlock(&c->sb_lock); return ret ?: bch2_blacklist_table_initialize(c); } diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c index 75f27ec26f85..0367ea37e857 100644 --- a/fs/bcachefs/logged_ops.c +++ b/fs/bcachefs/logged_ops.c @@ -35,7 +35,7 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, { struct bch_fs *c = trans->c; u32 restart_count = trans->restart_count; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; fsck_err_on(test_bit(BCH_FS_clean_recovery, &c->flags), @@ -56,21 +56,18 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, bch2_bkey_buf_exit(&sk, c); fsck_err: - printbuf_exit(&buf); return ret ?: trans_was_restarted(trans, restart_count); } int bch2_resume_logged_ops(struct bch_fs *c) { - int ret = bch2_trans_run(c, - for_each_btree_key_max(trans, iter, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_max(trans, iter, BTREE_ID_logged_ops, POS(LOGGED_OPS_INUM_logged_ops, 0), POS(LOGGED_OPS_INUM_logged_ops, U64_MAX), BTREE_ITER_prefetch, k, - resume_logged_op(trans, &iter, k))); - bch_err_fn(c, ret); - return ret; + resume_logged_op(trans, &iter, k)); } static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k) @@ -107,12 +104,11 @@ int bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k) */ if (ret) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); bch2_fs_fatal_error(c, "deleting logged operation %s: %s", buf.buf, bch2_err_str(ret)); - printbuf_exit(&buf); } return ret; diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 57b5b3263b08..ee14656c3fdd 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -86,7 +86,7 @@ int bch2_lru_check_set(struct btree_trans *trans, struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct btree_iter lru_iter; struct bkey_s_c lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru, @@ -112,7 +112,6 @@ int bch2_lru_check_set(struct btree_trans *trans, err: fsck_err: bch2_trans_iter_exit(trans, &lru_iter); - printbuf_exit(&buf); return ret; } @@ -166,8 +165,8 @@ static int bch2_check_lru_key(struct btree_trans *trans, struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; - struct printbuf buf1 = PRINTBUF; - struct printbuf buf2 = PRINTBUF; + CLASS(printbuf, buf1)(); + CLASS(printbuf, buf2)(); struct bbpos bp = lru_pos_to_bp(lru_k); @@ -198,8 +197,6 @@ static int bch2_check_lru_key(struct btree_trans *trans, err: fsck_err: bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf2); - printbuf_exit(&buf1); return ret; } @@ -210,14 +207,13 @@ int bch2_check_lrus(struct bch_fs *c) bch2_bkey_buf_init(&last_flushed); bkey_init(&last_flushed.k->k); - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_lru_key(trans, &iter, k, &last_flushed))); + bch2_check_lru_key(trans, &iter, k, &last_flushed)); bch2_bkey_buf_exit(&last_flushed, c); - bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index f296cce95338..bd1e54e0efd5 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -119,34 +119,29 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, struct progress_indicator_state *progress, unsigned dev_idx, unsigned flags) { - struct btree_trans *trans = bch2_trans_get(c); - enum btree_id id; - int ret = 0; + CLASS(btree_trans, trans)(c); - for (id = 0; id < BTREE_ID_NR; id++) { + for (unsigned id = 0; id < BTREE_ID_NR; id++) { if (!btree_type_has_ptrs(id)) continue; - ret = for_each_btree_key_commit(trans, iter, id, POS_MIN, + int ret = for_each_btree_key_commit(trans, iter, id, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ bch2_progress_update_iter(trans, progress, &iter, "dropping user data"); bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags); })); if (ret) - break; + return ret; } - bch2_trans_put(trans); - - return ret; + return 0; } static int bch2_dev_metadata_drop(struct bch_fs *c, struct progress_indicator_state *progress, unsigned dev_idx, unsigned flags) { - struct btree_trans *trans; struct btree_iter iter; struct closure cl; struct btree *b; @@ -158,7 +153,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, if (flags & BCH_FORCE_IF_METADATA_LOST) return bch_err_throw(c, remove_with_metadata_missing_unimplemented); - trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); bch2_bkey_buf_init(&k); closure_init_stack(&cl); @@ -199,7 +194,6 @@ next: ret = 0; err: bch2_bkey_buf_exit(&k, c); - bch2_trans_put(trans); BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); @@ -240,7 +234,7 @@ out: int bch2_dev_data_drop_by_backpointers(struct bch_fs *c, unsigned dev_idx, unsigned flags) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bkey_buf last_flushed; bch2_bkey_buf_init(&last_flushed); @@ -260,7 +254,6 @@ int bch2_dev_data_drop_by_backpointers(struct bch_fs *c, unsigned dev_idx, unsig })); bch2_bkey_buf_exit(&last_flushed, trans->c); - bch2_trans_put(trans); bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index eec591e947bd..3f44bb54f91a 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -54,22 +54,20 @@ trace_io_move2(struct bch_fs *c, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, k); prt_newline(&buf); bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts); trace_io_move(c, buf.buf); - printbuf_exit(&buf); } static noinline void trace_io_move_read2(struct bch_fs *c, struct bkey_s_c k) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, k); trace_io_move_read(c, buf.buf); - printbuf_exit(&buf); } static noinline void @@ -78,7 +76,7 @@ trace_io_move_pred2(struct bch_fs *c, struct bkey_s_c k, struct data_update_opts *data_opts, move_pred_fn pred, void *_arg, bool p) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "%ps: %u", pred, p); @@ -92,7 +90,6 @@ trace_io_move_pred2(struct bch_fs *c, struct bkey_s_c k, prt_newline(&buf); bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts); trace_io_move_pred(c, buf.buf); - printbuf_exit(&buf); } static noinline void @@ -128,10 +125,9 @@ static void move_free(struct moving_io *io) if (io->b) atomic_dec(&io->b->count); - mutex_lock(&ctxt->lock); - list_del(&io->io_list); + scoped_guard(mutex, &ctxt->lock) + list_del(&io->io_list); wake_up(&ctxt->wait); - mutex_unlock(&ctxt->lock); if (!io->write.data_opts.scrub) { bch2_data_update_exit(&io->write); @@ -150,11 +146,9 @@ static void move_write_done(struct bch_write_op *op) if (op->error) { if (trace_io_move_write_fail_enabled()) { - struct printbuf buf = PRINTBUF; - + CLASS(printbuf, buf)(); bch2_write_op_to_text(&buf, op); trace_io_move_write_fail(c, buf.buf); - printbuf_exit(&buf); } this_cpu_inc(c->counters[BCH_COUNTER_io_move_write_fail]); @@ -203,11 +197,9 @@ static void move_write(struct moving_io *io) } if (trace_io_move_write_enabled()) { - struct printbuf buf = PRINTBUF; - + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k)); trace_io_move_write(c, buf.buf); - printbuf_exit(&buf); } closure_get(&io->write.ctxt->cl); @@ -276,9 +268,8 @@ void bch2_moving_ctxt_exit(struct moving_context *ctxt) EBUG_ON(atomic_read(&ctxt->read_sectors)); EBUG_ON(atomic_read(&ctxt->read_ios)); - mutex_lock(&c->moving_context_lock); - list_del(&ctxt->list); - mutex_unlock(&c->moving_context_lock); + scoped_guard(mutex, &c->moving_context_lock) + list_del(&ctxt->list); /* * Generally, releasing a transaction within a transaction restart means @@ -314,9 +305,8 @@ void bch2_moving_ctxt_init(struct moving_context *ctxt, INIT_LIST_HEAD(&ctxt->ios); init_waitqueue_head(&ctxt->wait); - mutex_lock(&c->moving_context_lock); - list_add(&ctxt->list, &c->moving_context_list); - mutex_unlock(&c->moving_context_lock); + scoped_guard(mutex, &c->moving_context_lock) + list_add(&ctxt->list, &c->moving_context_list); } void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c) @@ -412,13 +402,13 @@ int bch2_move_extent(struct moving_context *ctxt, if (trace_io_move_read_enabled()) trace_io_move_read2(c, k); - mutex_lock(&ctxt->lock); - atomic_add(io->read_sectors, &ctxt->read_sectors); - atomic_inc(&ctxt->read_ios); + scoped_guard(mutex, &ctxt->lock) { + atomic_add(io->read_sectors, &ctxt->read_sectors); + atomic_inc(&ctxt->read_ios); - list_add_tail(&io->read_list, &ctxt->reads); - list_add_tail(&io->io_list, &ctxt->ios); - mutex_unlock(&ctxt->lock); + list_add_tail(&io->read_list, &ctxt->reads); + list_add_tail(&io->io_list, &ctxt->ios); + } /* * dropped by move_read_endio() - guards against use after free of @@ -443,13 +433,11 @@ err: count_event(c, io_move_start_fail); if (trace_io_move_start_fail_enabled()) { - struct printbuf buf = PRINTBUF; - + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, k); prt_str(&buf, ": "); prt_str(&buf, bch2_err_str(ret)); trace_io_move_start_fail(c, buf.buf); - printbuf_exit(&buf); } if (bch2_err_matches(ret, BCH_ERR_data_update_done)) @@ -468,7 +456,7 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, struct bch_io_opts *opts_ret = &io_opts->fs_io_opts; int ret = 0; - if (extent_iter->min_depth) + if (btree_iter_path(trans, extent_iter)->level) return opts_ret; if (extent_k.k->type == KEY_TYPE_reflink_v) @@ -672,8 +660,7 @@ retry_root: k = bkey_i_to_s_c(&b->key); - io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, - iter.pos, &iter, k); + io_opts = &snapshot_io_opts.fs_io_opts; ret = PTR_ERR_OR_ZERO(io_opts); if (ret) goto root_err; @@ -875,7 +862,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, u64 check_mismatch_done = bucket_start; int ret = 0; - struct bch_dev *ca = bch2_dev_tryget(c, dev); + CLASS(bch2_dev_tryget, ca)(c, dev); if (!ca) return 0; @@ -1014,7 +1001,6 @@ err: bch2_trans_iter_exit(trans, &bp_iter); bch2_bkey_buf_exit(&sk, c); bch2_bkey_buf_exit(&last_flushed, c); - bch2_dev_put(ca); return ret; } @@ -1031,9 +1017,9 @@ int bch2_move_data_phys(struct bch_fs *c, { struct moving_context ctxt; - bch2_trans_run(c, bch2_btree_write_buffer_flush_sync(trans)); - bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); + bch2_btree_write_buffer_flush_sync(ctxt.trans); + if (ctxt.stats) { ctxt.stats->phys = true; ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys; @@ -1268,12 +1254,11 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) BBPOS_MAX, rewrite_old_nodes_pred, c, stats); if (!ret) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); c->disk_sb.sb->version_min = c->disk_sb.sb->version; bch2_write_super(c); - mutex_unlock(&c->sb_lock); } bch_err_fn(c, ret); @@ -1343,18 +1328,18 @@ static bool scrub_pred(struct bch_fs *c, void *_arg, int bch2_data_job(struct bch_fs *c, struct bch_move_stats *stats, - struct bch_ioctl_data op) + struct bch_ioctl_data *op) { - struct bbpos start = BBPOS(op.start_btree, op.start_pos); - struct bbpos end = BBPOS(op.end_btree, op.end_pos); + struct bbpos start = BBPOS(op->start_btree, op->start_pos); + struct bbpos end = BBPOS(op->end_btree, op->end_pos); int ret = 0; - if (op.op >= BCH_DATA_OP_NR) + if (op->op >= BCH_DATA_OP_NR) return -EINVAL; - bch2_move_stats_init(stats, bch2_data_ops_strs[op.op]); + bch2_move_stats_init(stats, bch2_data_ops_strs[op->op]); - switch (op.op) { + switch (op->op) { case BCH_DATA_OP_scrub: /* * prevent tests from spuriously failing, make sure we see all @@ -1362,13 +1347,13 @@ int bch2_data_job(struct bch_fs *c, */ bch2_btree_interior_updates_flush(c); - ret = bch2_move_data_phys(c, op.scrub.dev, 0, U64_MAX, - op.scrub.data_types, + ret = bch2_move_data_phys(c, op->scrub.dev, 0, U64_MAX, + op->scrub.data_types, NULL, stats, writepoint_hashed((unsigned long) current), false, - scrub_pred, &op) ?: ret; + scrub_pred, op) ?: ret; break; case BCH_DATA_OP_rereplicate: @@ -1385,18 +1370,18 @@ int bch2_data_job(struct bch_fs *c, ret = bch2_replicas_gc2(c) ?: ret; break; case BCH_DATA_OP_migrate: - if (op.migrate.dev >= c->sb.nr_devices) + if (op->migrate.dev >= c->sb.nr_devices) return -EINVAL; stats->data_type = BCH_DATA_journal; - ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); - ret = bch2_move_data_phys(c, op.migrate.dev, 0, U64_MAX, + ret = bch2_journal_flush_device_pins(&c->journal, op->migrate.dev); + ret = bch2_move_data_phys(c, op->migrate.dev, 0, U64_MAX, ~0, NULL, stats, writepoint_hashed((unsigned long) current), true, - migrate_pred, &op) ?: ret; + migrate_pred, op) ?: ret; bch2_btree_interior_updates_flush(c); ret = bch2_replicas_gc2(c) ?: ret; break; @@ -1468,11 +1453,11 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str printbuf_indent_add(out, 2); - mutex_lock(&ctxt->lock); - struct moving_io *io; - list_for_each_entry(io, &ctxt->ios, io_list) - bch2_data_update_inflight_to_text(out, &io->write); - mutex_unlock(&ctxt->lock); + scoped_guard(mutex, &ctxt->lock) { + struct moving_io *io; + list_for_each_entry(io, &ctxt->ios, io_list) + bch2_data_update_inflight_to_text(out, &io->write); + } printbuf_indent_sub(out, 4); } @@ -1481,10 +1466,9 @@ void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c) { struct moving_context *ctxt; - mutex_lock(&c->moving_context_lock); - list_for_each_entry(ctxt, &c->moving_context_list, list) - bch2_moving_ctxt_to_text(out, c, ctxt); - mutex_unlock(&c->moving_context_lock); + scoped_guard(mutex, &c->moving_context_lock) + list_for_each_entry(ctxt, &c->moving_context_list, list) + bch2_moving_ctxt_to_text(out, c, ctxt); } void bch2_fs_move_init(struct bch_fs *c) diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 86b80499ac55..fe92ca6d418d 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -152,7 +152,7 @@ int bch2_evacuate_bucket(struct moving_context *, struct data_update_opts); int bch2_data_job(struct bch_fs *, struct bch_move_stats *, - struct bch_ioctl_data); + struct bch_ioctl_data *); void bch2_move_stats_to_text(struct printbuf *, struct bch_move_stats *); void bch2_move_stats_exit(struct bch_move_stats *, struct bch_fs *); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 27e68d470ad0..9192b1fc3594 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -71,7 +71,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, if (ret) return ret; - struct bch_dev *ca = bch2_dev_tryget(c, k.k->p.inode); + CLASS(bch2_dev_bucket_tryget, ca)(c, k.k->p); if (!ca) goto out; @@ -90,7 +90,6 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, ret = lru_idx && lru_idx <= time; out: - bch2_dev_put(ca); bch2_trans_iter_exit(trans, &iter); return ret; } @@ -320,8 +319,8 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c) bch2_printbuf_make_room(out, 4096); struct task_struct *t; - out->atomic++; scoped_guard(rcu) { + guard(printbuf_atomic)(out); prt_printf(out, "Currently calculated wait:\n"); for_each_rw_member_rcu(c, ca) { prt_printf(out, " %s:\t", ca->name); @@ -333,7 +332,6 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c) if (t) get_task_struct(t); } - --out->atomic; if (t) { bch2_prt_task_backtrace(out, t, 0, GFP_KERNEL); diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index 3e2b41babc26..8fa108880f58 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -99,7 +99,9 @@ int bch2_create_trans(struct btree_trans *trans, * If we're not root, we have to own the subvolume being * snapshotted: */ - if (uid && new_inode->bi_uid != uid) { + if (uid && + !capable(CAP_FOWNER) && + new_inode->bi_uid != uid) { ret = -EPERM; goto err; } @@ -727,7 +729,7 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans, bool in_fsck) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct btree_iter bp_iter = {}; int ret = 0; @@ -835,7 +837,6 @@ out: err: fsck_err: bch2_trans_iter_exit(trans, &bp_iter); - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } @@ -847,7 +848,7 @@ int __bch2_check_dirent_target(struct btree_trans *trans, bool in_fsck) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; ret = bch2_check_dirent_inode_dirent(trans, d, target, in_fsck); @@ -882,7 +883,6 @@ int __bch2_check_dirent_target(struct btree_trans *trans, } err: fsck_err: - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } @@ -940,7 +940,7 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans, snapshot_id_list *snapshot_overwrites, bool *do_update) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool repairing_parents = false; int ret = 0; @@ -967,7 +967,7 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans, ret = bch2_inum_snapshot_to_path(trans, inode->bi_inum, inode->bi_snapshot, snapshot_overwrites, &buf); if (ret) - goto err; + return ret; if (fsck_err(trans, inode_has_case_insensitive_not_set, "%s", buf.buf)) { inode->bi_flags |= BCH_INODE_has_case_insensitive; @@ -986,14 +986,14 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans, if (dir.bi_parent_subvol) { ret = bch2_subvolume_get_snapshot(trans, dir.bi_parent_subvol, &snapshot); if (ret) - goto err; + return ret; snapshot_overwrites = NULL; } ret = bch2_inode_find_by_inum_snapshot(trans, dir.bi_dir, snapshot, &dir, 0); if (ret) - goto err; + return ret; if (!(dir.bi_flags & BCH_INODE_has_case_insensitive)) { prt_printf(&buf, "parent of casefolded dir with has_case_insensitive not set\n"); @@ -1001,13 +1001,13 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans, ret = bch2_inum_snapshot_to_path(trans, dir.bi_inum, dir.bi_snapshot, snapshot_overwrites, &buf); if (ret) - goto err; + return ret; if (fsck_err(trans, inode_parent_has_case_insensitive_not_set, "%s", buf.buf)) { dir.bi_flags |= BCH_INODE_has_case_insensitive; ret = __bch2_fsck_write_inode(trans, &dir); if (ret) - goto err; + return ret; } } @@ -1019,9 +1019,7 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans, break; } out: -err: fsck_err: - printbuf_exit(&buf); if (ret) return ret; diff --git a/fs/bcachefs/nocow_locking.c b/fs/bcachefs/nocow_locking.c index 962218fa68ec..58cfd540c6d6 100644 --- a/fs/bcachefs/nocow_locking.c +++ b/fs/bcachefs/nocow_locking.c @@ -47,7 +47,7 @@ bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l, int v, lock_val = flags ? 1 : -1; unsigned i; - spin_lock(&l->lock); + guard(spinlock)(&l->lock); for (i = 0; i < ARRAY_SIZE(l->b); i++) if (l->b[i] == dev_bucket) @@ -58,21 +58,19 @@ bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l, l->b[i] = dev_bucket; goto take_lock; } -fail: - spin_unlock(&l->lock); + return false; got_entry: v = atomic_read(&l->l[i]); if (lock_val > 0 ? v < 0 : v > 0) - goto fail; + return false; take_lock: v = atomic_read(&l->l[i]); /* Overflow? */ if (v && sign(v + lock_val) != sign(v)) - goto fail; + return false; atomic_add(lock_val, &l->l[i]); - spin_unlock(&l->lock); return true; } diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index b1cf88905b81..921f9049912d 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -584,7 +584,7 @@ void bch2_opt_hook_post_set(struct bch_fs *c, struct bch_dev *ca, u64 inum, break; case Opt_discard: if (!ca) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); for_each_member_device(c, ca) { struct bch_member *m = bch2_members_v2_get_mut(ca->disk_sb.sb, ca->dev_idx); @@ -592,7 +592,6 @@ void bch2_opt_hook_post_set(struct bch_fs *c, struct bch_dev *ca, u64 inum, } bch2_write_super(c); - mutex_unlock(&c->sb_lock); } break; case Opt_version_upgrade: @@ -613,7 +612,6 @@ int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts, struct printbuf *parse_later, const char *name, const char *val) { - struct printbuf err = PRINTBUF; u64 v; int ret, id; @@ -638,46 +636,36 @@ int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts, val = bch2_opt_val_synonym_lookup(name, val); if (!(bch2_opt_table[id].flags & OPT_MOUNT)) - goto bad_opt; + return -BCH_ERR_option_name; if (id == Opt_acl && !IS_ENABLED(CONFIG_BCACHEFS_POSIX_ACL)) - goto bad_opt; + return -BCH_ERR_option_name; if ((id == Opt_usrquota || id == Opt_grpquota) && !IS_ENABLED(CONFIG_BCACHEFS_QUOTA)) - goto bad_opt; + return -BCH_ERR_option_name; + CLASS(printbuf, err)(); ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err); if (ret == -BCH_ERR_option_needs_open_fs) { - ret = 0; - if (parse_later) { prt_printf(parse_later, "%s=%s,", name, val); if (parse_later->allocation_failure) - ret = -ENOMEM; + return -ENOMEM; } - goto out; + return 0; } if (ret < 0) - goto bad_val; + return -BCH_ERR_option_value; if (opts) bch2_opt_set_by_id(opts, id, v); - ret = 0; -out: - printbuf_exit(&err); - return ret; -bad_opt: - ret = -BCH_ERR_option_name; - goto out; -bad_val: - ret = -BCH_ERR_option_value; - goto out; + return 0; } int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts, @@ -805,11 +793,10 @@ bool __bch2_opt_set_sb(struct bch_sb *sb, int dev_idx, bool bch2_opt_set_sb(struct bch_fs *c, struct bch_dev *ca, const struct bch_option *opt, u64 v) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); bool changed = __bch2_opt_set_sb(c->disk_sb.sb, ca ? ca->dev_idx : -1, opt, v); if (changed) bch2_write_super(c); - mutex_unlock(&c->sb_lock); return changed; } diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 63f8e254495c..84ce69a7f131 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -150,12 +150,12 @@ enum fsck_err_opts { NULL, "Number of consecutive write errors allowed before kicking out a device")\ x(metadata_replicas, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ - OPT_UINT(1, BCH_REPLICAS_MAX), \ + OPT_UINT(1, BCH_REPLICAS_MAX + 1), \ BCH_SB_META_REPLICAS_WANT, 1, \ "#", "Number of metadata replicas") \ x(data_replicas, u8, \ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ - OPT_UINT(1, BCH_REPLICAS_MAX), \ + OPT_UINT(1, BCH_REPLICAS_MAX + 1), \ BCH_SB_DATA_REPLICAS_WANT, 1, \ "#", "Number of data replicas") \ x(metadata_replicas_required, u8, \ @@ -165,7 +165,7 @@ enum fsck_err_opts { "#", NULL) \ x(data_replicas_required, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT, \ - OPT_UINT(1, BCH_REPLICAS_MAX), \ + OPT_UINT(1, BCH_REPLICAS_MAX + 1), \ BCH_SB_DATA_REPLICAS_REQ, 1, \ "#", NULL) \ x(encoded_extent_max, u32, \ @@ -529,7 +529,7 @@ enum fsck_err_opts { "size", "Specifies the bucket size; must be greater than the btree node size")\ x(durability, u8, \ OPT_DEVICE|OPT_RUNTIME|OPT_SB_FIELD_ONE_BIAS, \ - OPT_UINT(0, BCH_REPLICAS_MAX), \ + OPT_UINT(0, BCH_REPLICAS_MAX + 1), \ BCH_MEMBER_DURABILITY, 1, \ "n", "Data written to this device will be considered\n"\ "to have already been replicated n times") \ diff --git a/fs/bcachefs/printbuf.h b/fs/bcachefs/printbuf.h index 8f4e28d440ac..907e5c97550b 100644 --- a/fs/bcachefs/printbuf.h +++ b/fs/bcachefs/printbuf.h @@ -295,4 +295,8 @@ static inline void printbuf_atomic_dec(struct printbuf *buf) buf->atomic--; } +DEFINE_GUARD(printbuf_atomic, struct printbuf *, + printbuf_atomic_inc(_T), + printbuf_atomic_dec(_T)); + #endif /* _BCACHEFS_PRINTBUF_H */ diff --git a/fs/bcachefs/progress.c b/fs/bcachefs/progress.c index d09898566abe..42353067ba28 100644 --- a/fs/bcachefs/progress.c +++ b/fs/bcachefs/progress.c @@ -46,7 +46,7 @@ void bch2_progress_update_iter(struct btree_trans *trans, s->last_node = b; if (progress_update_p(s)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); unsigned percent = s->nodes_total ? div64_u64(s->nodes_seen * 100, s->nodes_total) : 0; @@ -56,6 +56,5 @@ void bch2_progress_update_iter(struct btree_trans *trans, bch2_bbpos_to_text(&buf, BBPOS(iter->btree_id, iter->pos)); bch_info(c, "%s", buf.buf); - printbuf_exit(&buf); } } diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index f241efb1fb50..5f1eff591b29 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -394,12 +394,10 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k, dq = bkey_s_c_to_quota(k); q = &c->quotas[k.k->p.inode]; - mutex_lock(&q->lock); + guard(mutex)(&q->lock); mq = genradix_ptr_alloc(&q->table, k.k->p.offset, GFP_KERNEL); - if (!mq) { - mutex_unlock(&q->lock); + if (!mq) return -ENOMEM; - } for (i = 0; i < Q_COUNTERS; i++) { mq->c[i].hardlimit = le64_to_cpu(dq.v->c[i].hardlimit); @@ -414,8 +412,6 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k, mq->c[Q_INO].timer = qdq->d_ino_timer; if (qdq && qdq->d_fieldmask & QC_INO_WARNS) mq->c[Q_INO].warns = qdq->d_ino_warns; - - mutex_unlock(&q->lock); } return 0; @@ -522,24 +518,21 @@ advance: int bch2_fs_quota_read(struct bch_fs *c) { + scoped_guard(mutex, &c->sb_lock) { + struct bch_sb_field_quota *sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); + if (!sb_quota) + return bch_err_throw(c, ENOSPC_sb_quota); - mutex_lock(&c->sb_lock); - struct bch_sb_field_quota *sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); - if (!sb_quota) { - mutex_unlock(&c->sb_lock); - return bch_err_throw(c, ENOSPC_sb_quota); + bch2_sb_quota_read(c); } - bch2_sb_quota_read(c); - mutex_unlock(&c->sb_lock); - - int ret = bch2_trans_run(c, - for_each_btree_key(trans, iter, BTREE_ID_quotas, POS_MIN, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key(trans, iter, BTREE_ID_quotas, POS_MIN, BTREE_ITER_prefetch, k, __bch2_quota_set(c, k, NULL)) ?: for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, - bch2_fs_quota_read_inode(trans, &iter, k))); + bch2_fs_quota_read_inode(trans, &iter, k)); bch_err_fn(c, ret); return ret; } @@ -550,7 +543,6 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags) { struct bch_fs *c = sb->s_fs_info; struct bch_sb_field_quota *sb_quota; - int ret = 0; if (sb->s_flags & SB_RDONLY) return -EROFS; @@ -569,11 +561,12 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags) if (uflags & FS_QUOTA_PDQ_ENFD && !c->opts.prjquota) return -EINVAL; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); if (!sb_quota) { - ret = bch_err_throw(c, ENOSPC_sb_quota); - goto unlock; + int ret = bch_err_throw(c, ENOSPC_sb_quota); + bch_err_fn(c, ret); + return ret; } if (uflags & FS_QUOTA_UDQ_ENFD) @@ -586,10 +579,7 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags) SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, true); bch2_write_super(c); -unlock: - mutex_unlock(&c->sb_lock); - - return bch2_err_class(ret); + return 0; } static int bch2_quota_disable(struct super_block *sb, unsigned uflags) @@ -599,7 +589,7 @@ static int bch2_quota_disable(struct super_block *sb, unsigned uflags) if (sb->s_flags & SB_RDONLY) return -EROFS; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); if (uflags & FS_QUOTA_UDQ_ENFD) SET_BCH_SB_USRQUOTA(c->disk_sb.sb, false); @@ -610,8 +600,6 @@ static int bch2_quota_disable(struct super_block *sb, unsigned uflags) SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, false); bch2_write_super(c); - mutex_unlock(&c->sb_lock); - return 0; } @@ -700,14 +688,12 @@ static int bch2_quota_set_info(struct super_block *sb, int type, { struct bch_fs *c = sb->s_fs_info; struct bch_sb_field_quota *sb_quota; - int ret = 0; if (0) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); qc_info_to_text(&buf, info); pr_info("setting:\n%s", buf.buf); - printbuf_exit(&buf); } if (sb->s_flags & SB_RDONLY) @@ -723,11 +709,12 @@ static int bch2_quota_set_info(struct super_block *sb, int type, ~(QC_SPC_TIMER|QC_INO_TIMER|QC_SPC_WARNS|QC_INO_WARNS)) return -EINVAL; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); if (!sb_quota) { - ret = bch_err_throw(c, ENOSPC_sb_quota); - goto unlock; + int ret = bch_err_throw(c, ENOSPC_sb_quota); + bch_err_fn(c, ret); + return bch2_err_class(ret); } if (info->i_fieldmask & QC_SPC_TIMER) @@ -749,10 +736,7 @@ static int bch2_quota_set_info(struct super_block *sb, int type, bch2_sb_quota_read(c); bch2_write_super(c); -unlock: - mutex_unlock(&c->sb_lock); - - return bch2_err_class(ret); + return 0; } /* Get/set individual quotas: */ @@ -778,15 +762,13 @@ static int bch2_get_quota(struct super_block *sb, struct kqid kqid, struct bch_fs *c = sb->s_fs_info; struct bch_memquota_type *q = &c->quotas[kqid.type]; qid_t qid = from_kqid(&init_user_ns, kqid); - struct bch_memquota *mq; memset(qdq, 0, sizeof(*qdq)); - mutex_lock(&q->lock); - mq = genradix_ptr(&q->table, qid); + guard(mutex)(&q->lock); + struct bch_memquota *mq = genradix_ptr(&q->table, qid); if (mq) __bch2_quota_get(qdq, mq); - mutex_unlock(&q->lock); return 0; } @@ -799,21 +781,17 @@ static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid, qid_t qid = from_kqid(&init_user_ns, *kqid); struct genradix_iter iter; struct bch_memquota *mq; - int ret = 0; - mutex_lock(&q->lock); + guard(mutex)(&q->lock); genradix_for_each_from(&q->table, iter, mq, qid) if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) { __bch2_quota_get(qdq, mq); *kqid = make_kqid(current_user_ns(), kqid->type, iter.pos); - goto found; + return 0; } - ret = -ENOENT; -found: - mutex_unlock(&q->lock); - return bch2_err_class(ret); + return -ENOENT; } static int bch2_set_quota_trans(struct btree_trans *trans, @@ -821,12 +799,10 @@ static int bch2_set_quota_trans(struct btree_trans *trans, struct qc_dqblk *qdq) { struct btree_iter iter; - struct bkey_s_c k; - int ret; - - k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_quotas, new_quota->k.p, - BTREE_ITER_slots|BTREE_ITER_intent); - ret = bkey_err(k); + struct bkey_s_c k = + bch2_bkey_get_iter(trans, &iter, BTREE_ID_quotas, new_quota->k.p, + BTREE_ITER_slots|BTREE_ITER_intent); + int ret = bkey_err(k); if (unlikely(ret)) return ret; @@ -852,24 +828,22 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, struct qc_dqblk *qdq) { struct bch_fs *c = sb->s_fs_info; - struct bkey_i_quota new_quota; - int ret; if (0) { - struct printbuf buf = PRINTBUF; - + CLASS(printbuf, buf)(); qc_dqblk_to_text(&buf, qdq); pr_info("setting:\n%s", buf.buf); - printbuf_exit(&buf); } if (sb->s_flags & SB_RDONLY) return -EROFS; + struct bkey_i_quota new_quota; bkey_quota_init(&new_quota.k_i); new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid)); - ret = bch2_trans_commit_do(c, NULL, NULL, 0, + CLASS(btree_trans, trans)(c); + int ret = commit_do(trans, NULL, NULL, 0, bch2_set_quota_trans(trans, &new_quota, qdq)) ?: __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i), qdq); diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 73b463c94966..32fa7cf90b63 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -235,24 +235,19 @@ static const char * const bch2_rebalance_state_strs[] = { int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum) { struct btree_iter iter; - struct bkey_s_c k; - struct bkey_i_cookie *cookie; - u64 v; - int ret; - bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), BTREE_ITER_intent); - k = bch2_btree_iter_peek_slot(trans, &iter); - ret = bkey_err(k); + struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, &iter); + int ret = bkey_err(k); if (ret) goto err; - v = k.k->type == KEY_TYPE_cookie + u64 v = k.k->type == KEY_TYPE_cookie ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) : 0; - cookie = bch2_trans_kmalloc(trans, sizeof(*cookie)); + struct bkey_i_cookie *cookie = bch2_trans_kmalloc(trans, sizeof(*cookie)); ret = PTR_ERR_OR_ZERO(cookie); if (ret) goto err; @@ -269,8 +264,8 @@ err: int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) { - int ret = bch2_trans_commit_do(c, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc, + CLASS(btree_trans, trans)(c); + int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_set_rebalance_needs_scan_trans(trans, inum)); bch2_rebalance_wakeup(c); return ret; @@ -284,19 +279,15 @@ int bch2_set_fs_needs_rebalance(struct bch_fs *c) static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, u64 cookie) { struct btree_iter iter; - struct bkey_s_c k; - u64 v; - int ret; - bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), BTREE_ITER_intent); - k = bch2_btree_iter_peek_slot(trans, &iter); - ret = bkey_err(k); + struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, &iter); + int ret = bkey_err(k); if (ret) goto err; - v = k.k->type == KEY_TYPE_cookie + u64 v = k.k->type == KEY_TYPE_cookie ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) : 0; @@ -373,7 +364,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, } if (trace_rebalance_extent_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, k); prt_newline(&buf); @@ -399,7 +390,6 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, } trace_rebalance_extent(c, buf.buf); - printbuf_exit(&buf); } return k; @@ -713,17 +703,15 @@ void bch2_rebalance_stop(struct bch_fs *c) int bch2_rebalance_start(struct bch_fs *c) { - struct task_struct *p; - int ret; - if (c->rebalance.thread) return 0; if (c->opts.nochanges) return 0; - p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name); - ret = PTR_ERR_OR_ZERO(p); + struct task_struct *p = + kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name); + int ret = PTR_ERR_OR_ZERO(p); bch_err_msg(c, ret, "creating rebalance thread"); if (ret) return ret; @@ -779,7 +767,7 @@ static int check_rebalance_work_one(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct bkey_s_c extent_k, rebalance_k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = bkey_err(extent_k = bch2_btree_iter_peek(trans, extent_iter)) ?: bkey_err(rebalance_k = bch2_btree_iter_peek(trans, rebalance_iter)); @@ -833,7 +821,7 @@ static int check_rebalance_work_one(struct btree_trans *trans, ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, extent_k.k->p, false); if (ret) - goto err; + return ret; } if (fsck_err_on(should_have_rebalance && !have_rebalance, @@ -842,22 +830,20 @@ static int check_rebalance_work_one(struct btree_trans *trans, ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, extent_k.k->p, true); if (ret) - goto err; + return ret; } if (cmp <= 0) bch2_btree_iter_advance(trans, extent_iter); if (cmp >= 0) bch2_btree_iter_advance(trans, rebalance_iter); -err: fsck_err: - printbuf_exit(&buf); return ret; } int bch2_check_rebalance_work(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter rebalance_iter, extent_iter; int ret = 0; @@ -884,6 +870,5 @@ int bch2_check_rebalance_work(struct bch_fs *c) bch2_bkey_buf_exit(&last_flushed, c); bch2_trans_iter_exit(trans, &extent_iter); bch2_trans_iter_exit(trans, &rebalance_iter); - bch2_trans_put(trans); return ret < 0 ? ret : 0; } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 0def4ecb7f88..58c159e5f10d 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -37,80 +37,79 @@ int bch2_btree_lost_data(struct bch_fs *c, struct printbuf *msg, enum btree_id btree) { - u64 b = BIT_ULL(btree); int ret = 0; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); + bool write_sb = false; struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - if (!(c->sb.btrees_lost_data & b)) { + if (!(c->sb.btrees_lost_data & BIT_ULL(btree))) { prt_printf(msg, "flagging btree "); bch2_btree_id_to_text(msg, btree); prt_printf(msg, " lost data\n"); - ext->btrees_lost_data |= cpu_to_le64(b); + write_sb |= !__test_and_set_bit_le64(btree, &ext->btrees_lost_data); } /* Once we have runtime self healing for topology errors we won't need this: */ - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0, &write_sb) ?: ret; /* Btree node accounting will be off: */ - __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0) ?: ret; + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0, &write_sb) ?: ret; #ifdef CONFIG_BCACHEFS_DEBUG /* * These are much more minor, and don't need to be corrected right away, * but in debug mode we want the next fsck run to be clean: */ - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_lrus, 0) ?: ret; - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_lrus, 0, &write_sb) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents, 0, &write_sb) ?: ret; #endif switch (btree) { case BTREE_ID_alloc: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret; - - __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); - __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); - __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); - __set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent); - __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent); - __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret; + + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); goto out; case BTREE_ID_backpointers: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers, 0) ?: ret; - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers, 0, &write_sb) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers, 0, &write_sb) ?: ret; goto out; case BTREE_ID_need_discard: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret; goto out; case BTREE_ID_freespace: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret; goto out; case BTREE_ID_bucket_gens: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret; goto out; case BTREE_ID_lru: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret; goto out; case BTREE_ID_accounting: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0, &write_sb) ?: ret; goto out; case BTREE_ID_snapshots: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots, 0) ?: ret; - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret; - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots, 0, &write_sb) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0, &write_sb) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0, &write_sb) ?: ret; goto out; default: - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret; - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0, &write_sb) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0, &write_sb) ?: ret; goto out; } out: - bch2_write_super(c); - mutex_unlock(&c->sb_lock); - + if (write_sb) + bch2_write_super(c); return ret; } @@ -123,7 +122,7 @@ static void kill_btree(struct bch_fs *c, enum btree_id btree) /* for -o reconstruct_alloc: */ void bch2_reconstruct_alloc(struct bch_fs *c) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_allocations, ext->recovery_passes_required); @@ -167,7 +166,6 @@ void bch2_reconstruct_alloc(struct bch_fs *c) c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_no_alloc_info)); bch2_write_super(c); - mutex_unlock(&c->sb_lock); for (unsigned i = 0; i < btree_id_nr_alive(c); i++) if (btree_id_is_alloc(i)) @@ -339,14 +337,15 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) return cmp_int(l->journal_seq - 1, r->journal_seq - 1); } +DEFINE_DARRAY_NAMED(darray_journal_keys, struct journal_key *) + int bch2_journal_replay(struct bch_fs *c) { struct journal_keys *keys = &c->journal_keys; - DARRAY(struct journal_key *) keys_sorted = { 0 }; + CLASS(darray_journal_keys, keys_sorted)(); struct journal *j = &c->journal; u64 start_seq = c->journal_replay_seq_start; u64 end_seq = c->journal_replay_seq_start; - struct btree_trans *trans = NULL; bool immediate_flush = false; int ret = 0; @@ -354,13 +353,13 @@ int bch2_journal_replay(struct bch_fs *c) ret = bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)", keys->nr, start_seq, end_seq); if (ret) - goto err; + return ret; } BUG_ON(!atomic_read(&keys->ref)); move_gap(keys, keys->nr); - trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); /* * Replay accounting keys first: we can't allow the write buffer to @@ -380,7 +379,7 @@ int bch2_journal_replay(struct bch_fs *c) BCH_WATERMARK_reclaim, bch2_journal_replay_accounting_key(trans, k)); if (bch2_fs_fatal_err_on(ret, c, "error replaying accounting; %s", bch2_err_str(ret))) - goto err; + return ret; k->overwritten = true; } @@ -414,7 +413,7 @@ int bch2_journal_replay(struct bch_fs *c) if (ret) { ret = darray_push(&keys_sorted, k); if (ret) - goto err; + return ret; } } @@ -445,22 +444,16 @@ int bch2_journal_replay(struct bch_fs *c) : 0), bch2_journal_replay_key(trans, k)); if (ret) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_btree_id_level_to_text(&buf, k->btree_id, k->level); bch_err_msg(c, ret, "while replaying key at %s:", buf.buf); - printbuf_exit(&buf); - goto err; + return ret; } BUG_ON(k->btree_id != BTREE_ID_accounting && !k->overwritten); } - /* - * We need to put our btree_trans before calling flush_all_pins(), since - * that will use a btree_trans internally - */ - bch2_trans_put(trans); - trans = NULL; + bch2_trans_unlock_long(trans); if (!c->opts.retain_recovery_info && c->recovery.pass_done >= BCH_RECOVERY_PASS_journal_replay) @@ -479,12 +472,7 @@ int bch2_journal_replay(struct bch_fs *c) if (keys->nr) bch2_journal_log_msg(c, "journal replay finished"); -err: - if (trans) - bch2_trans_put(trans); - darray_exit(&keys_sorted); - bch_err_fn(c, ret); - return ret; + return 0; } /* journal replay early: */ @@ -596,7 +584,7 @@ static int journal_replay_early(struct bch_fs *c, static int read_btree_roots(struct bch_fs *c) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { @@ -632,7 +620,6 @@ static int read_btree_roots(struct bch_fs *c) } } fsck_err: - printbuf_exit(&buf); return ret; } @@ -666,7 +653,7 @@ static bool check_version_upgrade(struct bch_fs *c) } if (new_version > old_version) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); if (old_version < bcachefs_metadata_required_upgrade_below) prt_str(&buf, "Version upgrade required:\n"); @@ -699,14 +686,12 @@ static bool check_version_upgrade(struct bch_fs *c) } bch_notice(c, "%s", buf.buf); - printbuf_exit(&buf); - ret = true; } if (new_version > c->sb.version_incompat_allowed && c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "Now allowing incompatible features up to "); bch2_version_to_text(&buf, new_version); @@ -715,8 +700,6 @@ static bool check_version_upgrade(struct bch_fs *c) prt_newline(&buf); bch_notice(c, "%s", buf.buf); - printbuf_exit(&buf); - ret = true; } @@ -796,15 +779,14 @@ int bch2_fs_recovery(struct bch_fs *c) u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); if (sb_passes) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "superblock requires following recovery passes to be run:\n "); prt_bitflags(&buf, bch2_recovery_passes, sb_passes); bch_info(c, "%s", buf.buf); - printbuf_exit(&buf); } if (bch2_check_version_downgrade(c)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "Version downgrade required:"); @@ -820,7 +802,6 @@ int bch2_fs_recovery(struct bch_fs *c) } bch_info(c, "%s", buf.buf); - printbuf_exit(&buf); write_sb = true; } @@ -937,11 +918,10 @@ use_clean: if (ret) goto err; - ret = bch2_fs_resize_on_mount(c); - if (ret) { - up_write(&c->state_lock); + scoped_guard(rwsem_write, &c->state_lock) + ret = bch2_fs_resize_on_mount(c); + if (ret) goto err; - } if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) { bch_info(c, "filesystem is an unresized image file, mounting ro"); @@ -1119,10 +1099,9 @@ use_clean: bch2_move_stats_init(&stats, "recovery"); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_version_to_text(&buf, c->sb.version_min); bch_info(c, "scanning for old btree nodes: min_version %s", buf.buf); - printbuf_exit(&buf); ret = bch2_fs_read_write_early(c) ?: bch2_scan_old_btree_nodes(c, &stats); @@ -1150,14 +1129,13 @@ final_out: err: fsck_err: { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "error in recovery: %s\n", bch2_err_str(ret)); bch2_fs_emergency_read_only2(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } goto final_out; } @@ -1167,40 +1145,54 @@ int bch2_fs_initialize(struct bch_fs *c) struct bch_inode_unpacked root_inode, lostfound_inode; struct bkey_inode_buf packed_inode; struct qstr lostfound = QSTR("lost+found"); - struct bch_member *m; int ret; bch_notice(c, "initializing new filesystem"); set_bit(BCH_FS_new_fs, &c->flags); - mutex_lock(&c->sb_lock); - c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); - c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); - - bch2_check_version_downgrade(c); + scoped_guard(mutex, &c->sb_lock) { + c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); + c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); - if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { - bch2_sb_upgrade(c, bcachefs_metadata_version_current, false); - SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); - bch2_write_super(c); - } + bch2_check_version_downgrade(c); - for_each_member_device(c, ca) { - m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, false); - } + if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { + bch2_sb_upgrade(c, bcachefs_metadata_version_current, false); + SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); + bch2_write_super(c); + } - bch2_sb_members_to_cpu(c); + for_each_member_device(c, ca) { + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, false); + } - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + bch2_write_super(c); + } set_bit(BCH_FS_btree_running, &c->flags); - set_bit(BCH_FS_may_go_rw, &c->flags); for (unsigned i = 0; i < BTREE_ID_NR; i++) bch2_btree_root_alloc_fake(c, i, 0); + for_each_member_device(c, ca) { + ret = bch2_dev_usage_init(ca, false); + if (ret) { + bch2_dev_put(ca); + goto err; + } + } + + /* + * Write out the superblock and journal buckets, now that we can do + * btree updates + */ + bch_verbose(c, "marking superblocks"); + ret = bch2_trans_mark_dev_sbs(c); + bch_err_msg(c, ret, "marking superblocks"); + if (ret) + goto err; + ret = bch2_fs_journal_alloc(c); if (ret) goto err; @@ -1213,28 +1205,12 @@ int bch2_fs_initialize(struct bch_fs *c) if (ret) goto err; + set_bit(BCH_FS_may_go_rw, &c->flags); ret = bch2_fs_read_write_early(c); if (ret) goto err; - set_bit(BCH_FS_accounting_replay_done, &c->flags); - bch2_journal_set_replay_done(&c->journal); - - for_each_member_device(c, ca) { - ret = bch2_dev_usage_init(ca, false); - if (ret) { - bch2_dev_put(ca); - goto err; - } - } - - /* - * Write out the superblock and journal buckets, now that we can do - * btree updates - */ - bch_verbose(c, "marking superblocks"); - ret = bch2_trans_mark_dev_sbs(c); - bch_err_msg(c, ret, "marking superblocks"); + ret = bch2_journal_replay(c); if (ret) goto err; @@ -1292,12 +1268,11 @@ int bch2_fs_initialize(struct bch_fs *c) if (ret) goto err; - mutex_lock(&c->sb_lock); - SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true); - SET_BCH_SB_CLEAN(c->disk_sb.sb, false); - - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) { + SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true); + SET_BCH_SB_CLEAN(c->disk_sb.sb, false); + bch2_write_super(c); + } c->recovery.curr_pass = BCH_RECOVERY_PASS_NR; return 0; diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index c09ed2dd4639..b2cdd111fd0e 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -237,19 +237,21 @@ static int bch2_lookup_root_inode(struct bch_fs *c) subvol_inum inum = BCACHEFS_ROOT_SUBVOL_INUM; struct bch_inode_unpacked inode_u; struct bch_subvolume subvol; + CLASS(btree_trans, trans)(c); - return bch2_trans_do(c, + return lockrestart_do(trans, bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: bch2_inode_find_by_inum_trans(trans, inum, &inode_u)); } struct recovery_pass_fn { int (*fn)(struct bch_fs *); + const char *name; unsigned when; }; static struct recovery_pass_fn recovery_pass_fns[] = { -#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when }, +#define x(_fn, _id, _when) { .fn = bch2_##_fn, .name = #_fn, .when = _when }, BCH_RECOVERY_PASSES() #undef x }; @@ -338,7 +340,8 @@ static bool recovery_pass_needs_set(struct bch_fs *c, int __bch2_run_explicit_recovery_pass(struct bch_fs *c, struct printbuf *out, enum bch_recovery_pass pass, - enum bch_run_recovery_pass_flags flags) + enum bch_run_recovery_pass_flags flags, + bool *write_sb) { struct bch_fs_recovery *r = &c->recovery; int ret = 0; @@ -346,13 +349,11 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c, lockdep_assert_held(&c->sb_lock); bch2_printbuf_make_room(out, 1024); - out->atomic++; - - unsigned long lockflags; - spin_lock_irqsave(&r->lock, lockflags); + guard(printbuf_atomic)(out); + guard(spinlock_irq)(&r->lock); if (!recovery_pass_needs_set(c, pass, &flags)) - goto out; + return 0; bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags); bool rewind = in_recovery && @@ -360,17 +361,17 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c, !(r->passes_complete & BIT_ULL(pass)); bool ratelimit = flags & RUN_RECOVERY_PASS_ratelimit; - if (!(in_recovery && (flags & RUN_RECOVERY_PASS_nopersistent))) { + if (!(flags & RUN_RECOVERY_PASS_nopersistent)) { struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); + *write_sb |= !__test_and_set_bit_le64(bch2_recovery_pass_to_stable(pass), + ext->recovery_passes_required); } if (pass < BCH_RECOVERY_PASS_set_may_go_rw && (!in_recovery || r->curr_pass >= BCH_RECOVERY_PASS_set_may_go_rw)) { prt_printf(out, "need recovery pass %s (%u), but already rw\n", bch2_recovery_passes[pass], pass); - ret = bch_err_throw(c, cannot_rewind_recovery); - goto out; + return bch_err_throw(c, cannot_rewind_recovery); } if (ratelimit) @@ -400,9 +401,7 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c, if (p->when & PASS_ONLINE) bch2_run_async_recovery_passes(c); } -out: - spin_unlock_irqrestore(&r->lock, lockflags); - --out->atomic; + return ret; } @@ -411,14 +410,19 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass, enum bch_run_recovery_pass_flags flags) { - int ret = 0; + /* + * With RUN_RECOVERY_PASS_ratelimit, recovery_pass_needs_set needs + * sb_lock + */ + if (!(flags & RUN_RECOVERY_PASS_ratelimit) && + !recovery_pass_needs_set(c, pass, &flags)) + return 0; - if (recovery_pass_needs_set(c, pass, &flags)) { - guard(mutex)(&c->sb_lock); - ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags); + guard(mutex)(&c->sb_lock); + bool write_sb = false; + int ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags, &write_sb); + if (write_sb) bch2_write_super(c); - } - return ret; } @@ -441,14 +445,13 @@ int bch2_require_recovery_pass(struct bch_fs *c, return 0; enum bch_run_recovery_pass_flags flags = 0; - int ret = 0; - if (recovery_pass_needs_set(c, pass, &flags)) { - ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags); + bool write_sb = false; + int ret = __bch2_run_explicit_recovery_pass(c, out, pass, flags, &write_sb) ?: + bch_err_throw(c, recovery_pass_will_run); + if (write_sb) bch2_write_super(c); - } - - return ret ?: bch_err_throw(c, recovery_pass_will_run); + return ret; } int bch2_run_print_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) @@ -458,16 +461,16 @@ int bch2_run_print_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pa if (!recovery_pass_needs_set(c, pass, &flags)) return 0; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); + bool write_sb = false; int ret = __bch2_run_explicit_recovery_pass(c, &buf, pass, - RUN_RECOVERY_PASS_nopersistent); - mutex_unlock(&c->sb_lock); + RUN_RECOVERY_PASS_nopersistent, + &write_sb); bch2_print_str(c, KERN_NOTICE, buf.buf); - printbuf_exit(&buf); return ret; } @@ -486,6 +489,7 @@ static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) r->passes_to_run &= ~BIT_ULL(pass); if (ret) { + bch_err(c, "%s(): error %s", p->name, bch2_err_str(ret)); r->passes_failing |= BIT_ULL(pass); return ret; } diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h index 2117f0ce1922..4f2c2f811d5e 100644 --- a/fs/bcachefs/recovery_passes.h +++ b/fs/bcachefs/recovery_passes.h @@ -30,7 +30,8 @@ int bch2_run_print_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pas int __bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *, enum bch_recovery_pass, - enum bch_run_recovery_pass_flags); + enum bch_run_recovery_pass_flags, + bool *); int bch2_run_explicit_recovery_pass(struct bch_fs *, struct printbuf *, enum bch_recovery_pass, enum bch_run_recovery_pass_flags); diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 8d8e045b6bd5..60abd89d7c9f 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -183,7 +183,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, u64 live_end = REFLINK_P_IDX(p.v) + p.k->size; u64 refd_start = live_start - le32_to_cpu(p.v->front_pad); u64 refd_end = live_end + le32_to_cpu(p.v->back_pad); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; BUG_ON(missing_start < refd_start); @@ -195,7 +195,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, prt_printf(&buf, "pointer to missing indirect extent in "); ret = bch2_inum_snap_offset_err_msg_trans(trans, &buf, missing_pos); if (ret) - goto err; + return ret; prt_printf(&buf, "-%llu\n", (missing_pos.offset + (missing_end - missing_start)) << 9); bch2_bkey_val_to_text(&buf, c, p.s_c); @@ -207,7 +207,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, struct bkey_i_reflink_p *new = bch2_bkey_make_mut_noupdate_typed(trans, p.s_c, reflink_p); ret = PTR_ERR_OR_ZERO(new); if (ret) - goto err; + return ret; /* * Is the missing range not actually needed? @@ -238,15 +238,13 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, BTREE_TRIGGER_norun); if (ret) - goto err; + return ret; if (should_commit) ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: bch_err_throw(c, transaction_restart_nested); } -err: fsck_err: - printbuf_exit(&buf); return ret; } @@ -301,7 +299,7 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans, enum btree_iter_update_trigger_flags flags) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); s64 offset_into_extent = *idx - REFLINK_P_IDX(p.v); struct btree_iter iter; @@ -360,7 +358,6 @@ next: err: fsck_err: bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); return ret; } @@ -374,7 +371,7 @@ static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans, int add = !(flags & BTREE_TRIGGER_overwrite) ? 1 : -1; u64 next_idx = REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad); s64 ret = 0; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); if (r_idx >= c->reflink_gc_nr) goto not_found; @@ -394,12 +391,10 @@ not_found: if (flags & BTREE_TRIGGER_check_repair) { ret = bch2_indirect_extent_missing_error(trans, p, *idx, next_idx, false); if (ret) - goto err; + return ret; } *idx = next_idx; -err: - printbuf_exit(&buf); return ret; } @@ -498,20 +493,15 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, bool reflink_p_may_update_opts_field) { struct bch_fs *c = trans->c; - struct btree_iter reflink_iter = {}; - struct bkey_s_c k; - struct bkey_i *r_v; - struct bkey_i_reflink_p *r_p; - __le64 *refcount; - int ret; if (orig->k.type == KEY_TYPE_inline_data) bch2_check_set_feature(c, BCH_FEATURE_reflink_inline_data); + struct btree_iter reflink_iter; bch2_trans_iter_init(trans, &reflink_iter, BTREE_ID_reflink, POS_MAX, BTREE_ITER_intent); - k = bch2_btree_iter_peek_prev(trans, &reflink_iter); - ret = bkey_err(k); + struct bkey_s_c k = bch2_btree_iter_peek_prev(trans, &reflink_iter); + int ret = bkey_err(k); if (ret) goto err; @@ -523,7 +513,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, if (bkey_ge(reflink_iter.pos, POS(0, REFLINK_P_IDX_MAX - orig->k.size))) return -ENOSPC; - r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k)); + struct bkey_i *r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k)); ret = PTR_ERR_OR_ZERO(r_v); if (ret) goto err; @@ -536,7 +526,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k)); - refcount = bkey_refcount(bkey_i_to_s(r_v)); + __le64 *refcount = bkey_refcount(bkey_i_to_s(r_v)); *refcount = 0; memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k)); @@ -549,7 +539,8 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, * so we know it will be big enough: */ orig->k.type = KEY_TYPE_reflink_p; - r_p = bkey_i_to_reflink_p(orig); + + struct bkey_i_reflink_p *r_p = bkey_i_to_reflink_p(orig); set_bkey_val_bytes(&r_p->k, sizeof(r_p->v)); /* FORTIFY_SOURCE is broken here, and doesn't provide unsafe_memset() */ @@ -598,7 +589,6 @@ s64 bch2_remap_range(struct bch_fs *c, u64 new_i_size, s64 *i_sectors_delta, bool may_change_src_io_path_opts) { - struct btree_trans *trans; struct btree_iter dst_iter, src_iter; struct bkey_s_c src_k; struct bkey_buf new_dst, new_src; @@ -623,7 +613,7 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_bkey_buf_init(&new_dst); bch2_bkey_buf_init(&new_src); - trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); ret = bch2_inum_opts_get(trans, src_inum, &opts); if (ret) @@ -761,7 +751,6 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_trans_iter_exit(trans, &inode_iter); } while (bch2_err_matches(ret2, BCH_ERR_transaction_restart)); err: - bch2_trans_put(trans); bch2_bkey_buf_exit(&new_src, c); bch2_bkey_buf_exit(&new_dst, c); @@ -779,7 +768,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, { struct bch_fs *c = trans->c; const __le64 *refcount = bkey_refcount_c(k); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct reflink_gc *r; int ret = 0; @@ -807,7 +796,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); ret = PTR_ERR_OR_ZERO(new); if (ret) - goto out; + return ret; if (!r->refcount) new->k.type = KEY_TYPE_deleted; @@ -815,32 +804,30 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount); ret = bch2_trans_update(trans, iter, new, 0); } -out: fsck_err: - printbuf_exit(&buf); return ret; } int bch2_gc_reflink_done(struct bch_fs *c) { + CLASS(btree_trans, trans)(c); size_t idx = 0; - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_gc_write_reflink_key(trans, &iter, k, &idx))); + bch2_gc_write_reflink_key(trans, &iter, k, &idx)); c->reflink_gc_nr = 0; return ret; } int bch2_gc_reflink_start(struct bch_fs *c) { + CLASS(btree_trans, trans)(c); c->reflink_gc_nr = 0; - int ret = bch2_trans_run(c, - for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, + int ret = for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, BTREE_ITER_prefetch, k, ({ const __le64 *refcount = bkey_refcount_c(k); @@ -858,7 +845,7 @@ int bch2_gc_reflink_start(struct bch_fs *c) r->size = k.k->size; r->refcount = 0; 0; - }))); + })); bch_err_fn(c, ret); return ret; diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 8383bd7fdb3f..0784283ce78c 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -286,11 +286,8 @@ bool bch2_replicas_marked_locked(struct bch_fs *c, bool bch2_replicas_marked(struct bch_fs *c, struct bch_replicas_entry_v1 *search) { - percpu_down_read(&c->mark_lock); - bool ret = bch2_replicas_marked_locked(c, search); - percpu_up_read(&c->mark_lock); - - return ret; + guard(percpu_read)(&c->mark_lock); + return bch2_replicas_marked_locked(c, search); } noinline @@ -305,14 +302,14 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, memset(&new_r, 0, sizeof(new_r)); memset(&new_gc, 0, sizeof(new_gc)); - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); if (c->replicas_gc.entries && !__replicas_has_entry(&c->replicas_gc, new_entry)) { new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry); if (!new_gc.entries) { ret = bch_err_throw(c, ENOMEM_cpu_replicas); - goto err; + goto out; } } @@ -320,12 +317,12 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry); if (!new_r.entries) { ret = bch_err_throw(c, ENOMEM_cpu_replicas); - goto err; + goto out; } ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r); if (ret) - goto err; + goto out; } if (!new_r.entries && @@ -338,22 +335,18 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, bch2_write_super(c); /* don't update in memory replicas until changes are persistent */ - percpu_down_write(&c->mark_lock); - if (new_r.entries) - swap(c->replicas, new_r); - if (new_gc.entries) - swap(new_gc, c->replicas_gc); - percpu_up_write(&c->mark_lock); + scoped_guard(percpu_write, &c->mark_lock) { + if (new_r.entries) + swap(c->replicas, new_r); + if (new_gc.entries) + swap(new_gc, c->replicas_gc); + } out: - mutex_unlock(&c->sb_lock); - kfree(new_r.entries); kfree(new_gc.entries); - return ret; -err: bch_err_msg(c, ret, "adding replicas entry"); - goto out; + return ret; } int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r) @@ -371,24 +364,20 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret) { lockdep_assert_held(&c->replicas_gc_lock); - mutex_lock(&c->sb_lock); - percpu_down_write(&c->mark_lock); - - ret = ret ?: - bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc); - if (!ret) - swap(c->replicas, c->replicas_gc); - - kfree(c->replicas_gc.entries); - c->replicas_gc.entries = NULL; + guard(mutex)(&c->sb_lock); + scoped_guard(percpu_write, &c->mark_lock) { + ret = ret ?: + bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc); + if (!ret) + swap(c->replicas, c->replicas_gc); - percpu_up_write(&c->mark_lock); + kfree(c->replicas_gc.entries); + c->replicas_gc.entries = NULL; + } if (!ret) bch2_write_super(c); - mutex_unlock(&c->sb_lock); - return ret; } @@ -399,7 +388,7 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) lockdep_assert_held(&c->replicas_gc_lock); - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); BUG_ON(c->replicas_gc.entries); c->replicas_gc.nr = 0; @@ -420,7 +409,6 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) c->replicas_gc.entry_size, GFP_KERNEL); if (!c->replicas_gc.entries) { - mutex_unlock(&c->sb_lock); bch_err(c, "error allocating c->replicas_gc"); return bch_err_throw(c, ENOMEM_replicas_gc); } @@ -432,8 +420,6 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) e, c->replicas_gc.entry_size); bch2_cpu_replicas_sort(&c->replicas_gc); - mutex_unlock(&c->sb_lock); - return 0; } @@ -461,55 +447,48 @@ retry: return bch_err_throw(c, ENOMEM_replicas_gc); } - mutex_lock(&c->sb_lock); - percpu_down_write(&c->mark_lock); - - if (nr != c->replicas.nr || - new.entry_size != c->replicas.entry_size) { - percpu_up_write(&c->mark_lock); - mutex_unlock(&c->sb_lock); - kfree(new.entries); - goto retry; - } - - for (unsigned i = 0; i < c->replicas.nr; i++) { - struct bch_replicas_entry_v1 *e = - cpu_replicas_entry(&c->replicas, i); + guard(mutex)(&c->sb_lock); + scoped_guard(percpu_write, &c->mark_lock) { + if (nr != c->replicas.nr || + new.entry_size != c->replicas.entry_size) { + kfree(new.entries); + goto retry; + } - struct disk_accounting_pos k = { - .type = BCH_DISK_ACCOUNTING_replicas, - }; + for (unsigned i = 0; i < c->replicas.nr; i++) { + struct bch_replicas_entry_v1 *e = + cpu_replicas_entry(&c->replicas, i); - unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e), - "embedded variable length struct"); + struct disk_accounting_pos k = { + .type = BCH_DISK_ACCOUNTING_replicas, + }; - struct bpos p = disk_accounting_pos_to_bpos(&k); + unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e), + "embedded variable length struct"); - struct bch_accounting_mem *acc = &c->accounting; - bool kill = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, &p) >= acc->k.nr; + struct bpos p = disk_accounting_pos_to_bpos(&k); - if (e->data_type == BCH_DATA_journal || !kill) - memcpy(cpu_replicas_entry(&new, new.nr++), - e, new.entry_size); - } + struct bch_accounting_mem *acc = &c->accounting; + bool kill = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), + accounting_pos_cmp, &p) >= acc->k.nr; - bch2_cpu_replicas_sort(&new); + if (e->data_type == BCH_DATA_journal || !kill) + memcpy(cpu_replicas_entry(&new, new.nr++), + e, new.entry_size); + } - ret = bch2_cpu_replicas_to_sb_replicas(c, &new); + bch2_cpu_replicas_sort(&new); - if (!ret) - swap(c->replicas, new); + ret = bch2_cpu_replicas_to_sb_replicas(c, &new); - kfree(new.entries); + if (!ret) + swap(c->replicas, new); - percpu_up_write(&c->mark_lock); + kfree(new.entries); + } if (!ret) bch2_write_super(c); - - mutex_unlock(&c->sb_lock); - return ret; } @@ -597,9 +576,8 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) bch2_cpu_replicas_sort(&new_r); - percpu_down_write(&c->mark_lock); + guard(percpu_write)(&c->mark_lock); swap(c->replicas, new_r); - percpu_up_write(&c->mark_lock); kfree(new_r.entries); @@ -809,9 +787,8 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, unsigned flags, bool print) { struct bch_replicas_entry_v1 *e; - bool ret = true; - percpu_down_read(&c->mark_lock); + guard(percpu_read)(&c->mark_lock); for_each_cpu_replicas_entry(&c->replicas, e) { unsigned nr_online = 0, nr_failed = 0, dflags = 0; bool metadata = e->data_type < BCH_DATA_user; @@ -847,21 +824,18 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, if (dflags & ~flags) { if (print) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_replicas_entry_to_text(&buf, e); bch_err(c, "insufficient devices online (%u) for replicas entry %s", nr_online, buf.buf); - printbuf_exit(&buf); } - ret = false; - break; + return false; } } - percpu_up_read(&c->mark_lock); - return ret; + return true; } unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) @@ -904,11 +878,8 @@ unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) { - mutex_lock(&c->sb_lock); - unsigned ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); - mutex_unlock(&c->sb_lock); - - return ret; + guard(mutex)(&c->sb_lock); + return bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); } void bch2_fs_replicas_exit(struct bch_fs *c) diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index 59c8770e4a0e..a5916984565e 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -89,8 +89,8 @@ int bch2_verify_superblock_clean(struct bch_fs *c, { unsigned i; struct bch_sb_field_clean *clean = *cleanp; - struct printbuf buf1 = PRINTBUF; - struct printbuf buf2 = PRINTBUF; + CLASS(printbuf, buf1)(); + CLASS(printbuf, buf2)(); int ret = 0; if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c, @@ -140,8 +140,6 @@ int bch2_verify_superblock_clean(struct bch_fs *c, l2, buf2.buf); } fsck_err: - printbuf_exit(&buf2); - printbuf_exit(&buf1); return ret; } @@ -150,7 +148,7 @@ struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c) struct bch_sb_field_clean *clean, *sb_clean; int ret; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); sb_clean = bch2_sb_field_get(c->disk_sb.sb, clean); if (fsck_err_on(!sb_clean, c, @@ -158,29 +156,22 @@ struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c) "superblock marked clean but clean section not present")) { SET_BCH_SB_CLEAN(c->disk_sb.sb, false); c->sb.clean = false; - mutex_unlock(&c->sb_lock); return ERR_PTR(-BCH_ERR_invalid_sb_clean); } clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field), GFP_KERNEL); - if (!clean) { - mutex_unlock(&c->sb_lock); + if (!clean) return ERR_PTR(-BCH_ERR_ENOMEM_read_superblock_clean); - } ret = bch2_sb_clean_validate_late(c, clean, READ); if (ret) { kfree(clean); - mutex_unlock(&c->sb_lock); return ERR_PTR(ret); } - mutex_unlock(&c->sb_lock); - return clean; fsck_err: - mutex_unlock(&c->sb_lock); return ERR_PTR(ret); } @@ -265,21 +256,16 @@ const struct bch_sb_field_ops bch_sb_field_ops_clean = { int bch2_fs_mark_dirty(struct bch_fs *c) { - int ret; - /* * Unconditionally write superblock, to verify it hasn't changed before * we go rw: */ - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); SET_BCH_SB_CLEAN(c->disk_sb.sb, false); c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS); - ret = bch2_write_super(c); - mutex_unlock(&c->sb_lock); - - return ret; + return bch2_write_super(c); } void bch2_fs_mark_clean(struct bch_fs *c) @@ -289,9 +275,9 @@ void bch2_fs_mark_clean(struct bch_fs *c) unsigned u64s; int ret; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); if (BCH_SB_CLEAN(c->disk_sb.sb)) - goto out; + return; SET_BCH_SB_CLEAN(c->disk_sb.sb, true); @@ -305,7 +291,7 @@ void bch2_fs_mark_clean(struct bch_fs *c) sb_clean = bch2_sb_field_resize(&c->disk_sb, clean, u64s); if (!sb_clean) { bch_err(c, "error resizing superblock while setting filesystem clean"); - goto out; + return; } sb_clean->flags = 0; @@ -329,12 +315,10 @@ void bch2_fs_mark_clean(struct bch_fs *c) ret = bch2_sb_clean_validate_late(c, sb_clean, WRITE); if (ret) { bch_err(c, "error writing marking filesystem clean: validate error"); - goto out; + return; } bch2_journal_pos_from_member_info_set(c); bch2_write_super(c); -out: - mutex_unlock(&c->sb_lock); } diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h index b868702a431a..2e3a56bfd085 100644 --- a/fs/bcachefs/sb-counters_format.h +++ b/fs/bcachefs/sb-counters_format.h @@ -12,6 +12,12 @@ enum counters_flags { x(io_read_inline, 80, TYPE_SECTORS) \ x(io_read_hole, 81, TYPE_SECTORS) \ x(io_read_promote, 30, TYPE_COUNTER) \ + x(io_read_nopromote, 85, TYPE_COUNTER) \ + x(io_read_nopromote_may_not, 86, TYPE_COUNTER) \ + x(io_read_nopromote_already_promoted, 87, TYPE_COUNTER) \ + x(io_read_nopromote_unwritten, 88, TYPE_COUNTER) \ + x(io_read_nopromote_congested, 89, TYPE_COUNTER) \ + x(io_read_nopromote_in_flight, 90, TYPE_COUNTER) \ x(io_read_bounce, 31, TYPE_COUNTER) \ x(io_read_split, 33, TYPE_COUNTER) \ x(io_read_reuse_race, 34, TYPE_COUNTER) \ diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 1506d05e0665..de56a1ee79db 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -191,7 +191,7 @@ int bch2_sb_set_upgrade_extra(struct bch_fs *c) bool write_sb = false; int ret = 0; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); if (old_version < bcachefs_metadata_version_bucket_stripe_sectors && @@ -205,7 +205,6 @@ int bch2_sb_set_upgrade_extra(struct bch_fs *c) if (write_sb) bch2_write_super(c); - mutex_unlock(&c->sb_lock); return ret < 0 ? ret : 0; } @@ -372,7 +371,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c) if (!test_bit(BCH_FS_btree_running, &c->flags)) return 0; - darray_char table = {}; + CLASS(darray_char, table)(); int ret = 0; for (const struct upgrade_downgrade_entry *src = downgrade_table; @@ -389,7 +388,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c) ret = darray_make_room(&table, bytes); if (ret) - goto out; + return ret; dst = (void *) &darray_top(table); dst->version = cpu_to_le16(src->version); @@ -401,7 +400,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c) ret = downgrade_table_extra(c, &table); if (ret) - goto out; + return ret; if (!dst->recovery_passes[0] && !dst->recovery_passes[1] && @@ -416,18 +415,14 @@ int bch2_sb_downgrade_update(struct bch_fs *c) unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64)); if (d && le32_to_cpu(d->field.u64s) > sb_u64s) - goto out; + return 0; d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s); - if (!d) { - ret = bch_err_throw(c, ENOSPC_sb_downgrade); - goto out; - } + if (!d) + return bch_err_throw(c, ENOSPC_sb_downgrade); memcpy(d->entries, table.data, table.nr); memset_u64s_tail(d->entries, 0, table.nr); -out: - darray_exit(&table); return ret; } diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c index 48853efdc105..41a259eab4fb 100644 --- a/fs/bcachefs/sb-errors.c +++ b/fs/bcachefs/sb-errors.c @@ -110,75 +110,66 @@ void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err) }; unsigned i; - mutex_lock(&c->fsck_error_counts_lock); + guard(mutex)(&c->fsck_error_counts_lock); + for (i = 0; i < e->nr; i++) { if (err == e->data[i].id) { e->data[i].nr++; e->data[i].last_error_time = n.last_error_time; - goto out; + return; } if (err < e->data[i].id) break; } if (darray_make_room(e, 1)) - goto out; + return; darray_insert_item(e, i, n); -out: - mutex_unlock(&c->fsck_error_counts_lock); } void bch2_sb_errors_from_cpu(struct bch_fs *c) { - bch_sb_errors_cpu *src = &c->fsck_error_counts; - struct bch_sb_field_errors *dst; - unsigned i; - - mutex_lock(&c->fsck_error_counts_lock); - - dst = bch2_sb_field_resize(&c->disk_sb, errors, - bch2_sb_field_errors_u64s(src->nr)); + guard(mutex)(&c->fsck_error_counts_lock); + bch_sb_errors_cpu *src = &c->fsck_error_counts; + struct bch_sb_field_errors *dst = + bch2_sb_field_resize(&c->disk_sb, errors, + bch2_sb_field_errors_u64s(src->nr)); if (!dst) - goto err; + return; - for (i = 0; i < src->nr; i++) { + for (unsigned i = 0; i < src->nr; i++) { SET_BCH_SB_ERROR_ENTRY_ID(&dst->entries[i], src->data[i].id); SET_BCH_SB_ERROR_ENTRY_NR(&dst->entries[i], src->data[i].nr); dst->entries[i].last_error_time = cpu_to_le64(src->data[i].last_error_time); } - -err: - mutex_unlock(&c->fsck_error_counts_lock); } static int bch2_sb_errors_to_cpu(struct bch_fs *c) { + guard(mutex)(&c->fsck_error_counts_lock); + struct bch_sb_field_errors *src = bch2_sb_field_get(c->disk_sb.sb, errors); bch_sb_errors_cpu *dst = &c->fsck_error_counts; - unsigned i, nr = bch2_sb_field_errors_nr_entries(src); - int ret; + unsigned nr = bch2_sb_field_errors_nr_entries(src); if (!nr) return 0; - mutex_lock(&c->fsck_error_counts_lock); - ret = darray_make_room(dst, nr); + int ret = darray_make_room(dst, nr); if (ret) - goto err; + return ret; dst->nr = nr; - for (i = 0; i < nr; i++) { + for (unsigned i = 0; i < nr; i++) { dst->data[i].id = BCH_SB_ERROR_ENTRY_ID(&src->entries[i]); dst->data[i].nr = BCH_SB_ERROR_ENTRY_NR(&src->entries[i]); dst->data[i].last_error_time = le64_to_cpu(src->entries[i].last_error_time); } -err: - mutex_unlock(&c->fsck_error_counts_lock); - return ret; + return 0; } void bch2_fs_sb_errors_exit(struct bch_fs *c) diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index f2abe92ca130..0573c7b00151 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -12,7 +12,7 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); bool removed = test_bit(dev, c->devs_removed.d); @@ -20,6 +20,7 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) prt_printf(&buf, "pointer to %s device %u in key\n", removed ? "removed" : "nonexistent", dev); bch2_bkey_val_to_text(&buf, c, k); + prt_newline(&buf); bool print = removed ? bch2_count_fsck_err(c, ptr_to_removed_device, &buf) @@ -30,7 +31,6 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) if (print) bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); return ret; } @@ -441,9 +441,8 @@ void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) struct bch_fs *c = ca->fs; struct bch_member m; - mutex_lock(&ca->fs->sb_lock); - m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); - mutex_unlock(&ca->fs->sb_lock); + scoped_guard(mutex, &ca->fs->sb_lock) + m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); printbuf_tabstop_push(out, 12); @@ -470,16 +469,15 @@ void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) void bch2_dev_errors_reset(struct bch_dev *ca) { struct bch_fs *c = ca->fs; - struct bch_member *m; - mutex_lock(&c->sb_lock); - m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + guard(mutex)(&c->sb_lock); + + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++) m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i])); m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds()); bch2_write_super(c); - mutex_unlock(&c->sb_lock); } /* @@ -611,7 +609,7 @@ have_slot: void bch2_sb_members_clean_deleted(struct bch_fs *c) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); bool write_sb = false; for (unsigned i = 0; i < c->sb.nr_devices; i++) { @@ -625,5 +623,4 @@ void bch2_sb_members_clean_deleted(struct bch_fs *c) if (write_sb) bch2_write_super(c); - mutex_unlock(&c->sb_lock); } diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 0d363a1cdd47..35d4ab9b6197 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -133,7 +133,7 @@ static inline void __bch2_dev_put(struct bch_dev *ca) static inline void bch2_dev_put(struct bch_dev *ca) { - if (ca) + if (!IS_ERR_OR_NULL(ca)) __bch2_dev_put(ca); } diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c index 538c324f4765..08083d6ca8bc 100644 --- a/fs/bcachefs/six.c +++ b/fs/bcachefs/six.c @@ -152,16 +152,16 @@ static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type, * here. */ if (type == SIX_LOCK_read && lock->readers) { - preempt_disable(); - this_cpu_inc(*lock->readers); /* signal that we own lock */ + scoped_guard(preempt) { + this_cpu_inc(*lock->readers); /* signal that we own lock */ - smp_mb(); + smp_mb(); - old = atomic_read(&lock->state); - ret = !(old & l[type].lock_fail); + old = atomic_read(&lock->state); + ret = !(old & l[type].lock_fail); - this_cpu_sub(*lock->readers, !ret); - preempt_enable(); + this_cpu_sub(*lock->readers, !ret); + } if (!ret) { smp_mb(); @@ -360,7 +360,7 @@ static inline bool six_optimistic_spin(struct six_lock *lock, if (atomic_read(&lock->state) & SIX_LOCK_NOSPIN) return false; - preempt_disable(); + guard(preempt)(); end_time = sched_clock() + 10 * NSEC_PER_USEC; while (!need_resched() && six_owner_running(lock)) { @@ -369,10 +369,8 @@ static inline bool six_optimistic_spin(struct six_lock *lock, * wait->lock_acquired: pairs with the smp_store_release in * __six_lock_wakeup */ - if (smp_load_acquire(&wait->lock_acquired)) { - preempt_enable(); + if (smp_load_acquire(&wait->lock_acquired)) return true; - } if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) { six_set_bitmask(lock, SIX_LOCK_NOSPIN); @@ -388,7 +386,6 @@ static inline bool six_optimistic_spin(struct six_lock *lock, cpu_relax(); } - preempt_enable(); return false; } diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 4c43d2a2c1f5..7a801513b134 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -284,12 +284,10 @@ fsck_err: static int bch2_snapshot_table_make_room(struct bch_fs *c, u32 id) { - mutex_lock(&c->snapshot_table_lock); - int ret = snapshot_t_mut(c, id) + guard(mutex)(&c->snapshot_table_lock); + return snapshot_t_mut(c, id) ? 0 : bch_err_throw(c, ENOMEM_mark_snapshot); - mutex_unlock(&c->snapshot_table_lock); - return ret; } static int __bch2_mark_snapshot(struct btree_trans *trans, @@ -300,15 +298,12 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, struct bch_fs *c = trans->c; struct snapshot_t *t; u32 id = new.k->p.offset; - int ret = 0; - mutex_lock(&c->snapshot_table_lock); + guard(mutex)(&c->snapshot_table_lock); t = snapshot_t_mut(c, id); - if (!t) { - ret = bch_err_throw(c, ENOMEM_mark_snapshot); - goto err; - } + if (!t) + return bch_err_throw(c, ENOMEM_mark_snapshot); if (new.k->type == KEY_TYPE_snapshot) { struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); @@ -348,9 +343,8 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, } else { memset(t, 0, sizeof(*t)); } -err: - mutex_unlock(&c->snapshot_table_lock); - return ret; + + return 0; } int bch2_mark_snapshot(struct btree_trans *trans, @@ -481,7 +475,7 @@ static int check_snapshot_tree(struct btree_trans *trans, struct bkey_s_c_snapshot_tree st; struct bch_snapshot s; struct bch_subvolume subvol; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct btree_iter snapshot_iter = {}; u32 root_id; int ret; @@ -567,7 +561,6 @@ out: err: fsck_err: bch2_trans_iter_exit(trans, &snapshot_iter); - printbuf_exit(&buf); return ret; } @@ -580,14 +573,12 @@ fsck_err: */ int bch2_check_snapshot_trees(struct bch_fs *c) { - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_snapshot_trees, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_snapshot_tree(trans, &iter, k))); - bch_err_fn(c, ret); - return ret; + check_snapshot_tree(trans, &iter, k)); } /* @@ -706,7 +697,7 @@ static int check_snapshot(struct btree_trans *trans, struct bkey_i_snapshot *u; u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); u32 real_depth; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); u32 i, id; int ret = 0; @@ -839,7 +830,6 @@ static int check_snapshot(struct btree_trans *trans, ret = 0; err: fsck_err: - printbuf_exit(&buf); return ret; } @@ -849,14 +839,12 @@ int bch2_check_snapshots(struct bch_fs *c) * We iterate backwards as checking/fixing the depth field requires that * the parent's depth already be correct: */ - int ret = bch2_trans_run(c, - for_each_btree_key_reverse_commit(trans, iter, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_snapshots, POS_MAX, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_snapshot(trans, &iter, k))); - bch_err_fn(c, ret); - return ret; + check_snapshot(trans, &iter, k)); } static int check_snapshot_exists(struct btree_trans *trans, u32 id) @@ -980,8 +968,8 @@ static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct int bch2_reconstruct_snapshots(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); - struct printbuf buf = PRINTBUF; + CLASS(btree_trans, trans)(c); + CLASS(printbuf, buf)(); struct snapshot_tree_reconstruct r = {}; int ret = 0; @@ -1023,10 +1011,7 @@ int bch2_reconstruct_snapshots(struct bch_fs *c) } fsck_err: err: - bch2_trans_put(trans); snapshot_tree_reconstruct_exit(&r); - printbuf_exit(&buf); - bch_err_fn(c, ret); return ret; } @@ -1035,7 +1020,7 @@ int __bch2_check_key_has_snapshot(struct btree_trans *trans, struct bkey_s_c k) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; enum snapshot_id_state state = bch2_snapshot_id_state(c, k.k->p.snapshot); @@ -1083,7 +1068,6 @@ int __bch2_check_key_has_snapshot(struct btree_trans *trans, } } fsck_err: - printbuf_exit(&buf); return ret; } @@ -1693,7 +1677,7 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s if (BCH_SNAPSHOT_DELETED(s.v)) return 0; - mutex_lock(&d->progress_lock); + guard(mutex)(&d->progress_lock); for (unsigned i = 0; i < 2; i++) { u32 child = le32_to_cpu(s.v->children[i]); @@ -1720,7 +1704,6 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s darray_push(&d->delete_interior, n); } } - mutex_unlock(&d->progress_lock); return ret; } @@ -1825,10 +1808,12 @@ int __bch2_delete_dead_snapshots(struct bch_fs *c) if (!mutex_trylock(&d->lock)) return 0; - if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) - goto out_unlock; + if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) { + mutex_unlock(&d->lock); + return 0; + } - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); /* * For every snapshot node: If we have no live children and it's not @@ -1848,11 +1833,10 @@ int __bch2_delete_dead_snapshots(struct bch_fs *c) goto err; { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_snapshot_delete_nodes_to_text(&buf, d); ret = commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf)); - printbuf_exit(&buf); if (ret) goto err; } @@ -1895,19 +1879,16 @@ int __bch2_delete_dead_snapshots(struct bch_fs *c) goto err; } err: - mutex_lock(&d->progress_lock); - darray_exit(&d->deleting_from_trees); - darray_exit(&d->delete_interior); - darray_exit(&d->delete_leaves); - d->running = false; - mutex_unlock(&d->progress_lock); - bch2_trans_put(trans); + scoped_guard(mutex, &d->progress_lock) { + darray_exit(&d->deleting_from_trees); + darray_exit(&d->delete_interior); + darray_exit(&d->delete_leaves); + d->running = false; + } bch2_recovery_pass_set_no_ratelimit(c, BCH_RECOVERY_PASS_check_snapshots); -out_unlock: + mutex_unlock(&d->lock); - if (!bch2_err_matches(ret, EROFS)) - bch_err_fn(c, ret); return ret; } @@ -1952,11 +1933,10 @@ void bch2_snapshot_delete_status_to_text(struct printbuf *out, struct bch_fs *c) return; } - mutex_lock(&d->progress_lock); - bch2_snapshot_delete_nodes_to_text(out, d); - - bch2_bbpos_to_text(out, d->pos); - mutex_unlock(&d->progress_lock); + scoped_guard(mutex, &d->progress_lock) { + bch2_snapshot_delete_nodes_to_text(out, d); + bch2_bbpos_to_text(out, d->pos); + } } int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, @@ -2010,11 +1990,11 @@ int bch2_snapshots_read(struct bch_fs *c) * Initializing the is_ancestor bitmaps requires ancestors to already be * initialized - so mark in reverse: */ - int ret = bch2_trans_run(c, - for_each_btree_key_reverse(trans, iter, BTREE_ID_snapshots, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_snapshots, POS_MAX, 0, k, __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: - bch2_check_snapshot_needs_deletion(trans, k))); + bch2_check_snapshot_needs_deletion(trans, k)); bch_err_fn(c, ret); /* diff --git a/fs/bcachefs/snapshot_types.h b/fs/bcachefs/snapshot_types.h index 0ab698f13e5c..a826c9c83c11 100644 --- a/fs/bcachefs/snapshot_types.h +++ b/fs/bcachefs/snapshot_types.h @@ -6,7 +6,7 @@ #include "darray.h" #include "subvolume_types.h" -typedef DARRAY(u32) snapshot_id_list; +DEFINE_DARRAY_NAMED(snapshot_id_list, u32); #define IS_ANCESTOR_BITMAP 128 diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c index d39fd4261e1b..3e08e55d2dc1 100644 --- a/fs/bcachefs/str_hash.c +++ b/fs/bcachefs/str_hash.c @@ -125,7 +125,7 @@ int bch2_repair_inode_hash_info(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool need_commit = false; int ret = 0; @@ -183,7 +183,7 @@ int bch2_repair_inode_hash_info(struct btree_trans *trans, goto err; if (!need_commit) { - struct printbuf buf = PRINTBUF; + printbuf_reset(&buf); bch2_log_msg_start(c, &buf); prt_printf(&buf, "inode %llu hash info mismatch with root, but mismatch not found\n", @@ -198,7 +198,6 @@ int bch2_repair_inode_hash_info(struct btree_trans *trans, prt_printf(&buf, " %llx %llx", hash_info->siphash_key.k0, hash_info->siphash_key.k1); #endif bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); ret = bch_err_throw(c, fsck_repair_unimplemented); goto err; } @@ -207,7 +206,6 @@ int bch2_repair_inode_hash_info(struct btree_trans *trans, bch_err_throw(c, transaction_restart_nested); err: fsck_err: - printbuf_exit(&buf); bch2_trans_iter_exit(trans, &iter); return ret; } @@ -244,7 +242,7 @@ int bch2_str_hash_repair_key(struct btree_trans *trans, bool *updated_before_k_pos) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool free_snapshots_seen = false; int ret = 0; @@ -331,7 +329,6 @@ duplicate_entries: out: fsck_err: bch2_trans_iter_exit(trans, dup_iter); - printbuf_exit(&buf); if (free_snapshots_seen) darray_exit(&s->ids); return ret; @@ -346,7 +343,7 @@ int __bch2_str_hash_check_key(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_iter iter = {}; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct bkey_s_c k; int ret = 0; @@ -375,9 +372,7 @@ int __bch2_str_hash_check_key(struct btree_trans *trans, goto bad_hash; } bch2_trans_iter_exit(trans, &iter); -out: fsck_err: - printbuf_exit(&buf); return ret; bad_hash: bch2_trans_iter_exit(trans, &iter); @@ -386,7 +381,7 @@ bad_hash: */ ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info); if (ret) - goto out; + return ret; if (fsck_err(trans, hash_table_key_wrong_offset, "hash table key at wrong offset: should be at %llu\n%s", @@ -396,5 +391,5 @@ bad_hash: k_iter, hash_k, &iter, bkey_s_c_null, updated_before_k_pos); - goto out; + return ret; } diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 353df662a9b5..2d2d6b22df88 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -17,7 +17,7 @@ static int bch2_subvolume_delete(struct btree_trans *, u32); static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "missing subvolume %u", subvolid); @@ -27,7 +27,6 @@ static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid) BCH_RECOVERY_PASS_check_inodes, 0); if (print) bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); return ret; } @@ -47,18 +46,18 @@ static int check_subvol(struct btree_trans *trans, struct bkey_s_c k) { struct bch_fs *c = trans->c; - struct bkey_s_c_subvolume subvol; struct btree_iter subvol_children_iter = {}; + struct bch_subvolume subvol; struct bch_snapshot snapshot; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); unsigned snapid; int ret = 0; if (k.k->type != KEY_TYPE_subvolume) return 0; - subvol = bkey_s_c_to_subvolume(k); - snapid = le32_to_cpu(subvol.v->snapshot); + bkey_val_copy(&subvol, bkey_s_c_to_subvolume(k)); + snapid = le32_to_cpu(subvol.snapshot); ret = bch2_snapshot_lookup(trans, snapid, &snapshot); if (bch2_err_matches(ret, ENOENT)) @@ -67,19 +66,19 @@ static int check_subvol(struct btree_trans *trans, if (ret) return ret; - if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { + if (BCH_SUBVOLUME_UNLINKED(&subvol)) { ret = bch2_subvolume_delete(trans, iter->pos.offset); bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); return ret ?: bch_err_throw(c, transaction_restart_nested); } - if (fsck_err_on(subvol.k->p.offset == BCACHEFS_ROOT_SUBVOL && - subvol.v->fs_path_parent, + if (fsck_err_on(k.k->p.offset == BCACHEFS_ROOT_SUBVOL && + subvol.fs_path_parent, trans, subvol_root_fs_path_parent_nonzero, "root subvolume has nonzero fs_path_parent\n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { struct bkey_i_subvolume *n = - bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume); + bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume); ret = PTR_ERR_OR_ZERO(n); if (ret) goto err; @@ -87,7 +86,7 @@ static int check_subvol(struct btree_trans *trans, n->v.fs_path_parent = 0; } - if (subvol.v->fs_path_parent) { + if (subvol.fs_path_parent) { struct bpos pos = subvolume_children_pos(k); struct bkey_s_c subvol_children_k = @@ -111,16 +110,16 @@ static int check_subvol(struct btree_trans *trans, struct bch_inode_unpacked inode; ret = bch2_inode_find_by_inum_nowarn_trans(trans, - (subvol_inum) { k.k->p.offset, le64_to_cpu(subvol.v->inode) }, + (subvol_inum) { k.k->p.offset, le64_to_cpu(subvol.inode) }, &inode); if (!ret) { - if (fsck_err_on(inode.bi_subvol != subvol.k->p.offset, + if (fsck_err_on(inode.bi_subvol != k.k->p.offset, trans, subvol_root_wrong_bi_subvol, "subvol root %llu:%u has wrong bi_subvol field: got %u, should be %llu", inode.bi_inum, inode.bi_snapshot, - inode.bi_subvol, subvol.k->p.offset)) { - inode.bi_subvol = subvol.k->p.offset; - inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot); + inode.bi_subvol, k.k->p.offset)) { + inode.bi_subvol = k.k->p.offset; + inode.bi_snapshot = le32_to_cpu(subvol.snapshot); ret = __bch2_fsck_write_inode(trans, &inode); if (ret) goto err; @@ -128,8 +127,8 @@ static int check_subvol(struct btree_trans *trans, } else if (bch2_err_matches(ret, ENOENT)) { if (fsck_err(trans, subvol_to_missing_root, "subvolume %llu points to missing subvolume root %llu:%u", - k.k->p.offset, le64_to_cpu(subvol.v->inode), - le32_to_cpu(subvol.v->snapshot))) { + k.k->p.offset, le64_to_cpu(subvol.inode), + le32_to_cpu(subvol.snapshot))) { /* * Recreate - any contents that are still disconnected * will then get reattached under lost+found @@ -137,10 +136,10 @@ static int check_subvol(struct btree_trans *trans, bch2_inode_init_early(c, &inode); bch2_inode_init_late(c, &inode, bch2_current_time(c), 0, 0, S_IFDIR|0700, 0, NULL); - inode.bi_inum = le64_to_cpu(subvol.v->inode); - inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot); + inode.bi_inum = le64_to_cpu(subvol.inode); + inode.bi_snapshot = le32_to_cpu(subvol.snapshot); inode.bi_subvol = k.k->p.offset; - inode.bi_parent_subvol = le32_to_cpu(subvol.v->fs_path_parent); + inode.bi_parent_subvol = le32_to_cpu(subvol.fs_path_parent); ret = __bch2_fsck_write_inode(trans, &inode); if (ret) goto err; @@ -149,8 +148,8 @@ static int check_subvol(struct btree_trans *trans, goto err; } - if (!BCH_SUBVOLUME_SNAP(subvol.v)) { - u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot)); + if (!BCH_SUBVOLUME_SNAP(&subvol)) { + u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.snapshot)); u32 snapshot_tree = bch2_snapshot_tree(c, snapshot_root); struct bch_snapshot_tree st; @@ -162,12 +161,12 @@ static int check_subvol(struct btree_trans *trans, if (ret) goto err; - if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, + if (fsck_err_on(le32_to_cpu(st.master_subvol) != k.k->p.offset, trans, subvol_not_master_and_not_snapshot, "subvolume %llu is not set as snapshot but is not master subvolume", k.k->p.offset)) { struct bkey_i_subvolume *s = - bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume); + bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume); ret = PTR_ERR_OR_ZERO(s); if (ret) goto err; @@ -178,19 +177,16 @@ static int check_subvol(struct btree_trans *trans, err: fsck_err: bch2_trans_iter_exit(trans, &subvol_children_iter); - printbuf_exit(&buf); return ret; } int bch2_check_subvols(struct bch_fs *c) { - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_subvol(trans, &iter, k))); - bch_err_fn(c, ret); - return ret; + check_subvol(trans, &iter, k)); } static int check_subvol_child(struct btree_trans *trans, @@ -219,13 +215,11 @@ fsck_err: int bch2_check_subvol_children(struct bch_fs *c) { - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_subvolume_children, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_subvol_child(trans, &iter, k))); - bch_err_fn(c, ret); - return 0; + check_subvol_child(trans, &iter, k)); } /* Subvolumes: */ @@ -348,7 +342,8 @@ int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol) int bch2_subvol_is_ro(struct bch_fs *c, u32 subvol) { - return bch2_trans_do(c, bch2_subvol_is_ro_trans(trans, subvol)); + CLASS(btree_trans, trans)(c); + return lockrestart_do(trans, bch2_subvol_is_ro_trans(trans, subvol)); } int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, @@ -514,18 +509,22 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor int ret = 0; while (!ret) { - mutex_lock(&c->snapshots_unlinked_lock); - snapshot_id_list s = c->snapshots_unlinked; - darray_init(&c->snapshots_unlinked); - mutex_unlock(&c->snapshots_unlinked_lock); + snapshot_id_list s; + + scoped_guard(mutex, &c->snapshots_unlinked_lock) { + s = c->snapshots_unlinked; + darray_init(&c->snapshots_unlinked); + } if (!s.nr) break; bch2_evict_subvolume_inodes(c, &s); + CLASS(btree_trans, trans)(c); + darray_for_each(s, id) { - ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id)); + ret = bch2_subvolume_delete(trans, *id); bch_err_msg(c, ret, "deleting subvolume %u", *id); if (ret) break; @@ -549,10 +548,9 @@ static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans struct bch_fs *c = trans->c; int ret = 0; - mutex_lock(&c->snapshots_unlinked_lock); - if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol)) - ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol); - mutex_unlock(&c->snapshots_unlinked_lock); + scoped_guard(mutex, &c->snapshots_unlinked_lock) + if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol)) + ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol); if (ret) return ret; @@ -677,7 +675,6 @@ int bch2_initialize_subvolumes(struct bch_fs *c) struct bkey_i_snapshot_tree root_tree; struct bkey_i_snapshot root_snapshot; struct bkey_i_subvolume root_volume; - int ret; bkey_snapshot_tree_init(&root_tree.k_i); root_tree.k.p.offset = 1; @@ -698,11 +695,9 @@ int bch2_initialize_subvolumes(struct bch_fs *c) root_volume.v.snapshot = cpu_to_le32(U32_MAX); root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO); - ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0, 0) ?: + return bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0, 0) ?: bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0, 0) ?: bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0, 0); - bch_err_fn(c, ret); - return ret; } static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) @@ -739,10 +734,9 @@ err: /* set bi_subvol on root inode */ int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) { - int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - __bch2_fs_upgrade_for_subvolumes(trans)); - bch_err_fn(c, ret); - return ret; + CLASS(btree_trans, trans)(c); + return commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + __bch2_fs_upgrade_for_subvolumes(trans)); } void bch2_fs_subvolumes_init_early(struct bch_fs *c) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 85e460d10e9d..40fa87ce1d09 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -68,23 +68,21 @@ enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_meta int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version) { - int ret = ((c->sb.features & BIT_ULL(BCH_FEATURE_incompat_version_field)) && - version <= c->sb.version_incompat_allowed) - ? 0 - : -BCH_ERR_may_not_use_incompat_feature; + guard(mutex)(&c->sb_lock); - mutex_lock(&c->sb_lock); - if (!ret) { + if (((c->sb.features & BIT_ULL(BCH_FEATURE_incompat_version_field)) && + version <= c->sb.version_incompat_allowed)) { SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); bch2_write_super(c); + return 0; } else { darray_for_each(c->incompat_versions_requested, i) if (version == *i) - goto out; + return -BCH_ERR_may_not_use_incompat_feature; darray_push(&c->incompat_versions_requested, version); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "requested incompat feature "); bch2_version_to_text(&buf, version); prt_str(&buf, " currently not enabled, allowed up to "); @@ -92,13 +90,8 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v prt_printf(&buf, "\n set version_upgrade=incompat to enable"); bch_notice(c, "%s", buf.buf); - printbuf_exit(&buf); + return -BCH_ERR_may_not_use_incompat_feature; } - -out: - mutex_unlock(&c->sb_lock); - - return ret; } const char * const bch2_sb_fields[] = { @@ -203,12 +196,11 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits; if (new_bytes > max_bytes) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_bdevname(&buf, sb->bdev); prt_printf(&buf, ": superblock too big: want %zu but have %llu", new_bytes, max_bytes); pr_err("%s", buf.buf); - printbuf_exit(&buf); return -BCH_ERR_ENOSPC_sb; } } @@ -783,8 +775,8 @@ static int __bch2_read_super(const char *path, struct bch_opts *opts, { u64 offset = opt_get(*opts, sb); struct bch_sb_layout layout; - struct printbuf err = PRINTBUF; - struct printbuf err2 = PRINTBUF; + CLASS(printbuf, err)(); + CLASS(printbuf, err2)(); __le64 *i; int ret; #ifndef __KERNEL__ @@ -859,7 +851,6 @@ retry: else bch2_print_opts(opts, KERN_ERR "%s", err2.buf); - printbuf_exit(&err2); printbuf_reset(&err); /* @@ -925,15 +916,14 @@ got_super: path, err.buf); goto err_no_print; } -out: - printbuf_exit(&err); - return ret; + + return 0; err: bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error reading superblock: %s\n", path, err.buf); err_no_print: bch2_free_super(sb); - goto out; + return ret; } int bch2_read_super(const char *path, struct bch_opts *opts, @@ -1001,7 +991,12 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb), null_nonce(), sb); - bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META); + /* + * blk-wbt.c throttles all writes except those that have both REQ_SYNC + * and REQ_IDLE set... + */ + + bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_IDLE|REQ_META); bio->bi_iter.bi_sector = le64_to_cpu(sb->offset); bio->bi_end_io = write_super_endio; bio->bi_private = ca; @@ -1019,7 +1014,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) int bch2_write_super(struct bch_fs *c) { struct closure *cl = &c->sb_write; - struct printbuf err = PRINTBUF; + CLASS(printbuf, err)(); unsigned sb = 0, nr_wrote; struct bch_devs_mask sb_written; bool wrote, can_mount_without_written, can_mount_with_written; @@ -1101,14 +1096,13 @@ int bch2_write_super(struct bch_fs *c) goto out; if (le16_to_cpu(c->disk_sb.sb->version) > bcachefs_metadata_version_current) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "attempting to write superblock that wasn't version downgraded ("); bch2_version_to_text(&buf, le16_to_cpu(c->disk_sb.sb->version)); prt_str(&buf, " > "); bch2_version_to_text(&buf, bcachefs_metadata_version_current); prt_str(&buf, ")"); bch2_fs_fatal_error(c, ": %s", buf.buf); - printbuf_exit(&buf); ret = bch_err_throw(c, sb_not_downgraded); goto out; } @@ -1129,7 +1123,7 @@ int bch2_write_super(struct bch_fs *c) continue; if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_char(&buf, ' '); prt_bdevname(&buf, ca->disk_sb.bdev); prt_printf(&buf, @@ -1144,12 +1138,10 @@ int bch2_write_super(struct bch_fs *c) } else { bch_err(c, "%s", buf.buf); } - - printbuf_exit(&buf); } if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_char(&buf, ' '); prt_bdevname(&buf, ca->disk_sb.bdev); prt_printf(&buf, @@ -1157,7 +1149,6 @@ int bch2_write_super(struct bch_fs *c) le64_to_cpu(ca->sb_read_scratch->seq), ca->disk_sb.seq); bch2_fs_fatal_error(c, "%s", buf.buf); - printbuf_exit(&buf); ret = bch_err_throw(c, erofs_sb_err); } } @@ -1219,19 +1210,17 @@ out: darray_for_each(online_devices, ca) enumerated_ref_put(&(*ca)->io_ref[READ], BCH_DEV_READ_REF_write_super); darray_exit(&online_devices); - printbuf_exit(&err); return ret; } void __bch2_check_set_feature(struct bch_fs *c, unsigned feat) { - mutex_lock(&c->sb_lock); - if (!(c->sb.features & (1ULL << feat))) { - c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat); + guard(mutex)(&c->sb_lock); + if (!(c->sb.features & BIT_ULL(feat))) { + c->disk_sb.sb->features[0] |= cpu_to_le64(BIT_ULL(feat)); bch2_write_super(c); } - mutex_unlock(&c->sb_lock); } /* Downgrade if superblock is at a higher version than currently supported: */ @@ -1279,11 +1268,12 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat) void bch2_sb_upgrade_incompat(struct bch_fs *c) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); + if (c->sb.version == c->sb.version_incompat_allowed) - goto unlock; + return; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "Now allowing incompatible features up to "); bch2_version_to_text(&buf, c->sb.version); @@ -1292,14 +1282,11 @@ void bch2_sb_upgrade_incompat(struct bch_fs *c) prt_newline(&buf); bch_notice(c, "%s", buf.buf); - printbuf_exit(&buf); c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), c->sb.version)); bch2_write_super(c); -unlock: - mutex_unlock(&c->sb_lock); } static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f, @@ -1365,7 +1352,7 @@ static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f, enum bch_validate_flags flags, struct printbuf *err) { unsigned type = le32_to_cpu(f->type); - struct printbuf field_err = PRINTBUF; + CLASS(printbuf, field_err)(); const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type); int ret; @@ -1377,7 +1364,6 @@ static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f, bch2_sb_field_to_text(err, sb, f); } - printbuf_exit(&field_err); return ret; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 6980cd5b0ca8..0fc0b2221036 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -103,9 +103,32 @@ const char * const bch2_dev_write_refs[] = { }; #undef x -static void __bch2_print_str(struct bch_fs *c, const char *prefix, - const char *str) +static bool should_print_loglevel(struct bch_fs *c, const char *fmt) { + unsigned loglevel_opt = c->loglevel ?: c->opts.verbose ? 7: 6; + + bool have_soh = fmt[0] == KERN_SOH[0]; + bool have_loglevel = have_soh && fmt[1] >= '0' && fmt[1] <= '9'; + + unsigned loglevel = have_loglevel + ? fmt[1] - '0' + : c->prev_loglevel; + + if (have_loglevel) + c->prev_loglevel = loglevel; + + return loglevel <= loglevel_opt; +} + +void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str) +{ + if (!should_print_loglevel(c, prefix)) + return; + +#ifndef __KERNEL__ + prefix = ""; +#endif + #ifdef __KERNEL__ struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c); @@ -114,12 +137,7 @@ static void __bch2_print_str(struct bch_fs *c, const char *prefix, return; } #endif - bch2_print_string_as_lines(KERN_ERR, str); -} - -void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str) -{ - __bch2_print_str(c, prefix, str); + bch2_print_string_as_lines(prefix, str); } __printf(2, 0) @@ -149,6 +167,14 @@ void bch2_print_opts(struct bch_opts *opts, const char *fmt, ...) void __bch2_print(struct bch_fs *c, const char *fmt, ...) { + if (!should_print_loglevel(c, fmt)) + return; + +#ifndef __KERNEL__ + if (fmt[0] == KERN_SOH[0]) + fmt += 2; +#endif + struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c); va_list args; @@ -241,14 +267,11 @@ static struct bch_fs *__bch2_uuid_to_fs(__uuid_t uuid) struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid) { - struct bch_fs *c; + guard(mutex)(&bch_fs_list_lock); - mutex_lock(&bch_fs_list_lock); - c = __bch2_uuid_to_fs(uuid); + struct bch_fs *c = __bch2_uuid_to_fs(uuid); if (c) closure_get(&c->cl); - mutex_unlock(&bch_fs_list_lock); - return c; } @@ -392,9 +415,8 @@ void bch2_fs_read_only(struct bch_fs *c) bch2_fs_mark_clean(c); } else { /* Make sure error counts/counters are persisted */ - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); bch2_write_super(c); - mutex_unlock(&c->sb_lock); bch_verbose(c, "done going read-only, filesystem not clean"); } @@ -405,9 +427,8 @@ static void bch2_fs_read_only_work(struct work_struct *work) struct bch_fs *c = container_of(work, struct bch_fs, read_only_work); - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); bch2_fs_read_only(c); - up_write(&c->state_lock); } static void bch2_fs_read_only_async(struct bch_fs *c) @@ -487,11 +508,15 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) ret = bch2_fs_init_rw(c); if (ret) - goto err; + return ret; ret = bch2_sb_members_v2_init(c); if (ret) - goto err; + return ret; + + ret = bch2_fs_mark_dirty(c); + if (ret) + return ret; clear_bit(BCH_FS_clean_shutdown, &c->flags); @@ -510,15 +535,16 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) * overwriting whatever was there previously, and there must always be * at least one non-flush write in the journal or recovery will fail: */ - spin_lock(&c->journal.lock); - set_bit(JOURNAL_need_flush_write, &c->journal.flags); - set_bit(JOURNAL_running, &c->journal.flags); - bch2_journal_space_available(&c->journal); - spin_unlock(&c->journal.lock); + scoped_guard(spinlock, &c->journal.lock) { + set_bit(JOURNAL_need_flush_write, &c->journal.flags); + set_bit(JOURNAL_running, &c->journal.flags); + bch2_journal_space_available(&c->journal); + } - ret = bch2_fs_mark_dirty(c); - if (ret) - goto err; + /* + * Don't jump to our error path, and call bch2_fs_read_only(), unless we + * successfully marked the filesystem dirty + */ ret = bch2_journal_reclaim_start(&c->journal); if (ret) @@ -571,11 +597,8 @@ int bch2_fs_read_write(struct bch_fs *c) int bch2_fs_read_write_early(struct bch_fs *c) { - down_write(&c->state_lock); - int ret = __bch2_fs_read_write(c, true); - up_write(&c->state_lock); - - return ret; + guard(rwsem_write)(&c->state_lock); + return __bch2_fs_read_write(c, true); } /* Filesystem startup/shutdown: */ @@ -673,9 +696,8 @@ void __bch2_fs_stop(struct bch_fs *c) set_bit(BCH_FS_stopping, &c->flags); - down_write(&c->state_lock); - bch2_fs_read_only(c); - up_write(&c->state_lock); + scoped_guard(rwsem_write, &c->state_lock) + bch2_fs_read_only(c); for (unsigned i = 0; i < c->sb.nr_devices; i++) { struct bch_dev *ca = rcu_dereference_protected(c->devs[i], true); @@ -711,9 +733,8 @@ void __bch2_fs_stop(struct bch_fs *c) void bch2_fs_free(struct bch_fs *c) { - mutex_lock(&bch_fs_list_lock); - list_del(&c->list); - mutex_unlock(&bch_fs_list_lock); + scoped_guard(mutex, &bch_fs_list_lock) + list_del(&c->list); closure_sync(&c->cl); closure_debug_destroy(&c->cl); @@ -775,21 +796,19 @@ static int bch2_fs_online(struct bch_fs *c) return ret; } - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); for_each_member_device(c, ca) { ret = bch2_dev_sysfs_online(c, ca); if (ret) { bch_err(c, "error creating sysfs objects"); bch2_dev_put(ca); - goto err; + return ret; } } BUG_ON(!list_empty(&c->list)); list_add(&c->list, &bch_fs_list); -err: - up_write(&c->state_lock); return ret; } @@ -826,8 +845,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, bch_sb_handles *sbs) { struct bch_fs *c; - struct printbuf name = PRINTBUF; unsigned i, iter_size; + CLASS(printbuf, name)(); int ret = 0; c = kvmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO); @@ -914,9 +933,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, if (ret) goto err; - mutex_lock(&c->sb_lock); - ret = bch2_sb_to_fs(c, sb); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) + ret = bch2_sb_to_fs(c, sb); if (ret) goto err; @@ -968,7 +986,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, goto err; strscpy(c->name, name.buf, sizeof(c->name)); - printbuf_exit(&name); iter_size = sizeof(struct sort_iter) + (btree_blocks(c) + 1) * 2 * @@ -1060,12 +1077,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, &c->clock_journal_res, (sizeof(struct jset_entry_clock) / sizeof(u64)) * 2); - mutex_lock(&bch_fs_list_lock); - ret = bch2_fs_online(c); - mutex_unlock(&bch_fs_list_lock); + scoped_guard(mutex, &bch_fs_list_lock) + ret = bch2_fs_online(c); if (ret) goto err; + + c->recovery_task = current; out: return c; err: @@ -1133,8 +1151,8 @@ static bool bch2_fs_may_start(struct bch_fs *c) case BCH_DEGRADED_yes: flags |= BCH_FORCE_IF_DEGRADED; break; - default: - mutex_lock(&c->sb_lock); + default: { + guard(mutex)(&c->sb_lock); for (unsigned i = 0; i < c->disk_sb.sb->nr_devices; i++) { if (!bch2_member_exists(c->disk_sb.sb, i)) continue; @@ -1143,13 +1161,11 @@ static bool bch2_fs_may_start(struct bch_fs *c) if (!bch2_dev_is_online(ca) && (ca->mi.state == BCH_MEMBER_STATE_rw || - ca->mi.state == BCH_MEMBER_STATE_ro)) { - mutex_unlock(&c->sb_lock); + ca->mi.state == BCH_MEMBER_STATE_ro)) return false; - } } - mutex_unlock(&c->sb_lock); break; + } } return bch2_have_enough_devs(c, c->online_devs, flags, true); @@ -1160,6 +1176,8 @@ int bch2_fs_start(struct bch_fs *c) time64_t now = ktime_get_real_seconds(); int ret = 0; + BUG_ON(test_bit(BCH_FS_started, &c->flags)); + print_mount_opts(c); if (c->cf_encoding) @@ -1171,44 +1189,29 @@ int bch2_fs_start(struct bch_fs *c) if (!bch2_fs_may_start(c)) return bch_err_throw(c, insufficient_devices_to_start); - down_write(&c->state_lock); - mutex_lock(&c->sb_lock); + scoped_guard(rwsem_write, &c->state_lock) { + guard(mutex)(&c->sb_lock); + if (!bch2_sb_field_get_minsize(&c->disk_sb, ext, + sizeof(struct bch_sb_field_ext) / sizeof(u64))) { + ret = bch_err_throw(c, ENOSPC_sb); + goto err; + } - BUG_ON(test_bit(BCH_FS_started, &c->flags)); + ret = bch2_sb_members_v2_init(c); + if (ret) + goto err; - if (!bch2_sb_field_get_minsize(&c->disk_sb, ext, - sizeof(struct bch_sb_field_ext) / sizeof(u64))) { - mutex_unlock(&c->sb_lock); - up_write(&c->state_lock); - ret = bch_err_throw(c, ENOSPC_sb); - goto err; - } + scoped_guard(rcu) + for_each_online_member_rcu(c, ca) { + bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = + cpu_to_le64(now); + if (ca->mi.state == BCH_MEMBER_STATE_rw) + bch2_dev_allocator_add(c, ca); + } - ret = bch2_sb_members_v2_init(c); - if (ret) { - mutex_unlock(&c->sb_lock); - up_write(&c->state_lock); - goto err; + bch2_recalc_capacity(c); } - scoped_guard(rcu) - for_each_online_member_rcu(c, ca) - bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = - cpu_to_le64(now); - - /* - * Dno't write superblock yet: recovery might have to downgrade - */ - mutex_unlock(&c->sb_lock); - - scoped_guard(rcu) - for_each_online_member_rcu(c, ca) - if (ca->mi.state == BCH_MEMBER_STATE_rw) - bch2_dev_allocator_add(c, ca); - bch2_recalc_capacity(c); - up_write(&c->state_lock); - - c->recovery_task = current; ret = BCH_SB_INITIALIZED(c->disk_sb.sb) ? bch2_fs_recovery(c) : bch2_fs_initialize(c); @@ -1229,13 +1232,12 @@ int bch2_fs_start(struct bch_fs *c) set_bit(BCH_FS_started, &c->flags); wake_up(&c->ro_ref_wait); - down_write(&c->state_lock); - if (c->opts.read_only) - bch2_fs_read_only(c); - else if (!test_bit(BCH_FS_rw, &c->flags)) - ret = bch2_fs_read_write(c); - up_write(&c->state_lock); - + scoped_guard(rwsem_write, &c->state_lock) { + if (c->opts.read_only) + bch2_fs_read_only(c); + else if (!test_bit(BCH_FS_rw, &c->flags)) + ret = bch2_fs_read_write(c); + } err: if (ret) bch_err_msg(c, ret, "starting filesystem"); @@ -1280,7 +1282,7 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, if (fs->sb->seq == sb->sb->seq && fs->sb->write_time != sb->sb->write_time) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "Split brain detected between "); prt_bdevname(&buf, sb->bdev); @@ -1305,7 +1307,6 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, prt_printf(&buf, "Not using older sb"); pr_err("%s", buf.buf); - printbuf_exit(&buf); if (!opts->no_splitbrain_check) return -BCH_ERR_device_splitbrain; @@ -1316,7 +1317,7 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, u64 seq_from_member = le64_to_cpu(sb->sb->seq); if (seq_from_fs && seq_from_fs < seq_from_member) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "Split brain detected between "); prt_bdevname(&buf, sb->bdev); @@ -1338,7 +1339,6 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, } pr_err("%s", buf.buf); - printbuf_exit(&buf); if (!opts->no_splitbrain_check) return -BCH_ERR_device_splitbrain; @@ -1553,18 +1553,16 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) struct bch_dev *ca = NULL; if (bch2_fs_init_fault("dev_alloc")) - goto err; + return bch_err_throw(c, ENOMEM_dev_alloc); ca = __bch2_dev_alloc(c, &member); if (!ca) - goto err; + return bch_err_throw(c, ENOMEM_dev_alloc); ca->fs = c; bch2_dev_attach(c, ca, dev_idx); return 0; -err: - return bch_err_throw(c, ENOMEM_dev_alloc); } static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) @@ -1579,7 +1577,10 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) if (get_capacity(sb->bdev->bd_disk) < ca->mi.bucket_size * ca->mi.nbuckets) { - bch_err(ca, "cannot online: device too small"); + bch_err(ca, "cannot online: device too small (capacity %llu filesystem size %llu nbuckets %llu)", + get_capacity(sb->bdev->bd_disk), + ca->mi.bucket_size * ca->mi.nbuckets, + ca->mi.nbuckets); return bch_err_throw(ca->fs, device_size_too_small); } @@ -1590,10 +1591,9 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) if (ret) return ret; - struct printbuf name = PRINTBUF; + CLASS(printbuf, name)(); prt_bdevname(&name, sb->bdev); strscpy(ca->name, name.buf, sizeof(ca->name)); - printbuf_exit(&name); /* Commit: */ ca->disk_sb = *sb; @@ -1725,7 +1725,6 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, enum bch_member_state new_state, int flags) { - struct bch_member *m; int ret = 0; if (ca->mi.state == new_state) @@ -1739,11 +1738,11 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, bch_notice(ca, "%s", bch2_member_states[new_state]); - mutex_lock(&c->sb_lock); - m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - SET_BCH_MEMBER_STATE(m, new_state); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) { + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_STATE(m, new_state); + bch2_write_super(c); + } if (new_state == BCH_MEMBER_STATE_rw) __bch2_dev_read_write(c, ca); @@ -1756,26 +1755,20 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, enum bch_member_state new_state, int flags) { - int ret; - - down_write(&c->state_lock); - ret = __bch2_dev_set_state(c, ca, new_state, flags); - up_write(&c->state_lock); - - return ret; + guard(rwsem_write)(&c->state_lock); + return __bch2_dev_set_state(c, ca, new_state, flags); } /* Device add/removal: */ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) { - struct bch_member *m; unsigned dev_idx = ca->dev_idx, data; bool fast_device_removal = !bch2_request_incompat_feature(c, bcachefs_metadata_version_fast_device_removal); int ret; - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); /* * We consume a reference to ca->ref, regardless of whether we succeed @@ -1842,20 +1835,17 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) data = bch2_dev_has_data(c, ca); if (data) { - struct printbuf data_has = PRINTBUF; - + CLASS(printbuf, data_has)(); prt_bitflags(&data_has, __bch2_data_types, data); bch_err(ca, "Remove failed, still has data (%s)", data_has.buf); - printbuf_exit(&data_has); ret = -EBUSY; goto err; } __bch2_dev_offline(c, ca); - mutex_lock(&c->sb_lock); - rcu_assign_pointer(c->devs[ca->dev_idx], NULL); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) + rcu_assign_pointer(c->devs[ca->dev_idx], NULL); #ifndef CONFIG_BCACHEFS_DEBUG percpu_ref_kill(&ca->ref); @@ -1871,25 +1861,23 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) * Free this device's slot in the bch_member array - all pointers to * this device must be gone: */ - mutex_lock(&c->sb_lock); - m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx); + scoped_guard(mutex, &c->sb_lock) { + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx); - if (fast_device_removal) - m->uuid = BCH_SB_MEMBER_DELETED_UUID; - else - memset(&m->uuid, 0, sizeof(m->uuid)); + if (fast_device_removal) + m->uuid = BCH_SB_MEMBER_DELETED_UUID; + else + memset(&m->uuid, 0, sizeof(m->uuid)); - bch2_write_super(c); + bch2_write_super(c); + } - mutex_unlock(&c->sb_lock); - up_write(&c->state_lock); return 0; err: if (test_bit(BCH_FS_rw, &c->flags) && ca->mi.state == BCH_MEMBER_STATE_rw && !enumerated_ref_is_zero(&ca->io_ref[READ])) __bch2_dev_read_write(c, ca); - up_write(&c->state_lock); return ret; } @@ -1899,8 +1887,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) struct bch_opts opts = bch2_opts_empty(); struct bch_sb_handle sb = {}; struct bch_dev *ca = NULL; - struct printbuf errbuf = PRINTBUF; - struct printbuf label = PRINTBUF; + CLASS(printbuf, label)(); int ret = 0; ret = bch2_read_super(path, &opts, &sb); @@ -1919,12 +1906,12 @@ int bch2_dev_add(struct bch_fs *c, const char *path) } if (list_empty(&c->list)) { - mutex_lock(&bch_fs_list_lock); - if (__bch2_uuid_to_fs(c->sb.uuid)) - ret = bch_err_throw(c, filesystem_uuid_already_open); - else - list_add(&c->list, &bch_fs_list); - mutex_unlock(&bch_fs_list_lock); + scoped_guard(mutex, &bch_fs_list_lock) { + if (__bch2_uuid_to_fs(c->sb.uuid)) + ret = bch_err_throw(c, filesystem_uuid_already_open); + else + list_add(&c->list, &bch_fs_list); + } if (ret) { bch_err(c, "filesystem UUID already open"); @@ -1946,101 +1933,95 @@ int bch2_dev_add(struct bch_fs *c, const char *path) if (ret) goto err; - down_write(&c->state_lock); - mutex_lock(&c->sb_lock); - SET_BCH_SB_MULTI_DEVICE(c->disk_sb.sb, true); + scoped_guard(rwsem_write, &c->state_lock) { + scoped_guard(mutex, &c->sb_lock) { + SET_BCH_SB_MULTI_DEVICE(c->disk_sb.sb, true); - ret = bch2_sb_from_fs(c, ca); - bch_err_msg(c, ret, "setting up new superblock"); - if (ret) - goto err_unlock; + ret = bch2_sb_from_fs(c, ca); + bch_err_msg(c, ret, "setting up new superblock"); + if (ret) + goto err; - if (dynamic_fault("bcachefs:add:no_slot")) - goto err_unlock; + if (dynamic_fault("bcachefs:add:no_slot")) + goto err; - ret = bch2_sb_member_alloc(c); - if (ret < 0) { - bch_err_msg(c, ret, "setting up new superblock"); - goto err_unlock; - } - unsigned dev_idx = ret; - ret = 0; + ret = bch2_sb_member_alloc(c); + if (ret < 0) { + bch_err_msg(c, ret, "setting up new superblock"); + goto err; + } + unsigned dev_idx = ret; + ret = 0; - /* success: */ + /* success: */ - dev_mi.last_mount = cpu_to_le64(ktime_get_real_seconds()); - *bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx) = dev_mi; + dev_mi.last_mount = cpu_to_le64(ktime_get_real_seconds()); + *bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx) = dev_mi; - ca->disk_sb.sb->dev_idx = dev_idx; - bch2_dev_attach(c, ca, dev_idx); + ca->disk_sb.sb->dev_idx = dev_idx; + bch2_dev_attach(c, ca, dev_idx); - if (BCH_MEMBER_GROUP(&dev_mi)) { - ret = __bch2_dev_group_set(c, ca, label.buf); - bch_err_msg(c, ret, "creating new label"); - if (ret) - goto err_unlock; - } + set_bit(ca->dev_idx, c->online_devs.d); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + if (BCH_MEMBER_GROUP(&dev_mi)) { + ret = __bch2_dev_group_set(c, ca, label.buf); + bch_err_msg(c, ret, "creating new label"); + if (ret) + goto err_late; + } - if (test_bit(BCH_FS_started, &c->flags)) { - ret = bch2_dev_usage_init(ca, false); - if (ret) - goto err_late; + bch2_write_super(c); + } - ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); - bch_err_msg(ca, ret, "marking new superblock"); + ret = bch2_dev_usage_init(ca, false); if (ret) goto err_late; - ret = bch2_fs_freespace_init(c); - bch_err_msg(ca, ret, "initializing free space"); - if (ret) - goto err_late; + if (test_bit(BCH_FS_started, &c->flags)) { + ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); + bch_err_msg(ca, ret, "marking new superblock"); + if (ret) + goto err_late; - if (ca->mi.state == BCH_MEMBER_STATE_rw) - __bch2_dev_read_write(c, ca); + ret = bch2_fs_freespace_init(c); + bch_err_msg(ca, ret, "initializing free space"); + if (ret) + goto err_late; - ret = bch2_dev_journal_alloc(ca, false); - bch_err_msg(c, ret, "allocating journal"); - if (ret) - goto err_late; - } + if (ca->mi.state == BCH_MEMBER_STATE_rw) + __bch2_dev_read_write(c, ca); - /* - * We just changed the superblock UUID, invalidate cache and send a - * uevent to update /dev/disk/by-uuid - */ - invalidate_bdev(ca->disk_sb.bdev); + ret = bch2_dev_journal_alloc(ca, false); + bch_err_msg(c, ret, "allocating journal"); + if (ret) + goto err_late; + } - char uuid_str[37]; - snprintf(uuid_str, sizeof(uuid_str), "UUID=%pUb", &c->sb.uuid); + /* + * We just changed the superblock UUID, invalidate cache and send a + * uevent to update /dev/disk/by-uuid + */ + invalidate_bdev(ca->disk_sb.bdev); - char *envp[] = { - "CHANGE=uuid", - uuid_str, - NULL, - }; - kobject_uevent_env(&ca->disk_sb.bdev->bd_device.kobj, KOBJ_CHANGE, envp); + char uuid_str[37]; + snprintf(uuid_str, sizeof(uuid_str), "UUID=%pUb", &c->sb.uuid); - up_write(&c->state_lock); + char *envp[] = { + "CHANGE=uuid", + uuid_str, + NULL, + }; + kobject_uevent_env(&ca->disk_sb.bdev->bd_device.kobj, KOBJ_CHANGE, envp); + } out: - printbuf_exit(&label); - printbuf_exit(&errbuf); bch_err_fn(c, ret); return ret; - -err_unlock: - mutex_unlock(&c->sb_lock); - up_write(&c->state_lock); err: if (ca) bch2_dev_free(ca); bch2_free_super(&sb); goto out; err_late: - up_write(&c->state_lock); ca = NULL; goto err; } @@ -2054,13 +2035,11 @@ int bch2_dev_online(struct bch_fs *c, const char *path) unsigned dev_idx; int ret; - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); ret = bch2_read_super(path, &opts, &sb); - if (ret) { - up_write(&c->state_lock); + if (ret) return ret; - } dev_idx = sb.sb->dev_idx; @@ -2097,39 +2076,33 @@ int bch2_dev_online(struct bch_fs *c, const char *path) goto err; } - mutex_lock(&c->sb_lock); - bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = - cpu_to_le64(ktime_get_real_seconds()); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) { + bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = + cpu_to_le64(ktime_get_real_seconds()); + bch2_write_super(c); + } - up_write(&c->state_lock); return 0; err: - up_write(&c->state_lock); bch2_free_super(&sb); return ret; } int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags) { - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); if (!bch2_dev_is_online(ca)) { bch_err(ca, "Already offline"); - up_write(&c->state_lock); return 0; } if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) { bch_err(ca, "Cannot offline required disk"); - up_write(&c->state_lock); return bch_err_throw(c, device_state_not_allowed); } __bch2_dev_offline(c, ca); - - up_write(&c->state_lock); return 0; } @@ -2147,60 +2120,54 @@ static int __bch2_dev_resize_alloc(struct bch_dev *ca, u64 old_nbuckets, u64 new int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) { - struct bch_member *m; u64 old_nbuckets; int ret = 0; - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); old_nbuckets = ca->mi.nbuckets; if (nbuckets < ca->mi.nbuckets) { bch_err(ca, "Cannot shrink yet"); - ret = -EINVAL; - goto err; + return -EINVAL; } if (nbuckets > BCH_MEMBER_NBUCKETS_MAX) { bch_err(ca, "New device size too big (%llu greater than max %u)", nbuckets, BCH_MEMBER_NBUCKETS_MAX); - ret = bch_err_throw(c, device_size_too_big); - goto err; + return bch_err_throw(c, device_size_too_big); } if (bch2_dev_is_online(ca) && get_capacity(ca->disk_sb.bdev->bd_disk) < ca->mi.bucket_size * nbuckets) { bch_err(ca, "New size larger than device"); - ret = bch_err_throw(c, device_size_too_small); - goto err; + return bch_err_throw(c, device_size_too_small); } ret = bch2_dev_buckets_resize(c, ca, nbuckets); bch_err_msg(ca, ret, "resizing buckets"); if (ret) - goto err; + return ret; ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); if (ret) - goto err; + return ret; - mutex_lock(&c->sb_lock); - m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - m->nbuckets = cpu_to_le64(nbuckets); + scoped_guard(mutex, &c->sb_lock) { + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + m->nbuckets = cpu_to_le64(nbuckets); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + bch2_write_super(c); + } if (ca->mi.freespace_initialized) { ret = __bch2_dev_resize_alloc(ca, old_nbuckets, nbuckets); if (ret) - goto err; + return ret; } bch2_recalc_capacity(c); -err: - up_write(&c->state_lock); - return ret; + return 0; } int bch2_fs_resize_on_mount(struct bch_fs *c) @@ -2218,26 +2185,24 @@ int bch2_fs_resize_on_mount(struct bch_fs *c) if (ret) { enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_fs_resize_on_mount); - up_write(&c->state_lock); return ret; } - mutex_lock(&c->sb_lock); - struct bch_member *m = - bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - m->nbuckets = cpu_to_le64(new_nbuckets); - SET_BCH_MEMBER_RESIZE_ON_MOUNT(m, false); + scoped_guard(mutex, &c->sb_lock) { + struct bch_member *m = + bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + m->nbuckets = cpu_to_le64(new_nbuckets); + SET_BCH_MEMBER_RESIZE_ON_MOUNT(m, false); - c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_small_image)); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_small_image)); + bch2_write_super(c); + } if (ca->mi.freespace_initialized) { ret = __bch2_dev_resize_alloc(ca, old_nbuckets, new_nbuckets); if (ret) { enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_fs_resize_on_mount); - up_write(&c->state_lock); return ret; } } @@ -2276,6 +2241,10 @@ static struct bch_fs *bdev_get_fs(struct block_device *bdev) return c; } +DEFINE_CLASS(bdev_get_fs, struct bch_fs *, + bch2_ro_ref_put(_T), bdev_get_fs(bdev), + struct block_device *bdev); + /* returns with ref on ca->ref */ static struct bch_dev *bdev_to_bch_dev(struct bch_fs *c, struct block_device *bdev) { @@ -2287,7 +2256,7 @@ static struct bch_dev *bdev_to_bch_dev(struct bch_fs *c, struct block_device *bd static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise) { - struct bch_fs *c = bdev_get_fs(bdev); + CLASS(bdev_get_fs, c)(bdev); if (!c) return; @@ -2301,48 +2270,45 @@ static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise) down_read(&sb->s_umount); } - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); + struct bch_dev *ca = bdev_to_bch_dev(c, bdev); - if (!ca) - goto unlock; + if (ca) { + bool dev = bch2_dev_state_allowed(c, ca, + BCH_MEMBER_STATE_failed, + BCH_FORCE_IF_DEGRADED); + + if (!dev && sb) { + if (!surprise) + sync_filesystem(sb); + shrink_dcache_sb(sb); + evict_inodes(sb); + } - bool dev = bch2_dev_state_allowed(c, ca, - BCH_MEMBER_STATE_failed, - BCH_FORCE_IF_DEGRADED); + CLASS(printbuf, buf)(); + __bch2_log_msg_start(ca->name, &buf); - if (!dev && sb) { - if (!surprise) - sync_filesystem(sb); - shrink_dcache_sb(sb); - evict_inodes(sb); - } + prt_printf(&buf, "offline from block layer"); - struct printbuf buf = PRINTBUF; - __bch2_log_msg_start(ca->name, &buf); + if (dev) { + __bch2_dev_offline(c, ca); + } else { + bch2_journal_flush(&c->journal); + bch2_fs_emergency_read_only2(c, &buf); + } - prt_printf(&buf, "offline from block layer"); + bch2_print_str(c, KERN_ERR, buf.buf); - if (dev) { - __bch2_dev_offline(c, ca); - } else { - bch2_journal_flush(&c->journal); - bch2_fs_emergency_read_only2(c, &buf); + bch2_dev_put(ca); } - bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); - - bch2_dev_put(ca); -unlock: if (sb) up_read(&sb->s_umount); - up_write(&c->state_lock); - bch2_ro_ref_put(c); } static void bch2_fs_bdev_sync(struct block_device *bdev) { - struct bch_fs *c = bdev_get_fs(bdev); + CLASS(bdev_get_fs, c)(bdev); if (!c) return; @@ -2353,12 +2319,9 @@ static void bch2_fs_bdev_sync(struct block_device *bdev) * unmounted - we only take this to avoid a warning in * sync_filesystem: */ - down_read(&sb->s_umount); + guard(rwsem_read)(&sb->s_umount); sync_filesystem(sb); - up_read(&sb->s_umount); } - - bch2_ro_ref_put(c); } const struct blk_holder_ops bch2_sb_handle_bdev_ops = { @@ -2380,7 +2343,6 @@ struct bch_fs *bch2_fs_open(darray_const_str *devices, bch_sb_handles sbs = {}; struct bch_fs *c = NULL; struct bch_sb_handle *best = NULL; - struct printbuf errbuf = PRINTBUF; int ret = 0; if (!try_module_get(THIS_MODULE)) @@ -2435,15 +2397,12 @@ struct bch_fs *bch2_fs_open(darray_const_str *devices, if (ret) goto err; - down_write(&c->state_lock); - darray_for_each(sbs, sb) { - ret = bch2_dev_attach_bdev(c, sb); - if (ret) { - up_write(&c->state_lock); - goto err; + scoped_guard(rwsem_write, &c->state_lock) + darray_for_each(sbs, sb) { + ret = bch2_dev_attach_bdev(c, sb); + if (ret) + goto err; } - } - up_write(&c->state_lock); if (!c->opts.nostart) { ret = bch2_fs_start(c); @@ -2454,7 +2413,6 @@ out: darray_for_each(sbs, sb) bch2_free_super(sb); darray_exit(&sbs); - printbuf_exit(&errbuf); module_put(THIS_MODULE); return c; err_print: @@ -2526,6 +2484,8 @@ static int bch2_param_get_static_key_t(char *buffer, const struct kernel_param * return sprintf(buffer, "%c\n", static_key_enabled(key) ? 'N' : 'Y'); } +/* this is unused in userspace - silence the warning */ +__maybe_unused static const struct kernel_param_ops bch2_param_ops_static_key_t = { .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = bch2_param_set_static_key_t, diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 50cf51654265..bd3fa9c3372d 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -18,6 +18,7 @@ #include "btree_key_cache.h" #include "btree_update.h" #include "btree_update_interior.h" +#include "btree_write_buffer.h" #include "btree_gc.h" #include "buckets.h" #include "clock.h" @@ -62,7 +63,7 @@ static ssize_t fn ## _to_text(struct printbuf *, \ static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\ char *buf) \ { \ - struct printbuf out = PRINTBUF; \ + CLASS(printbuf, out)(); \ ssize_t ret = fn ## _to_text(&out, kobj, attr); \ \ if (out.pos && out.buf[out.pos - 1] != '\n') \ @@ -75,7 +76,6 @@ static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\ ret = min_t(size_t, out.pos, PAGE_SIZE - 1); \ memcpy(buf, out.buf, ret); \ } \ - printbuf_exit(&out); \ return bch2_err_class(ret); \ } \ \ @@ -151,6 +151,7 @@ write_attribute(trigger_journal_flush); write_attribute(trigger_journal_writes); write_attribute(trigger_btree_cache_shrink); write_attribute(trigger_btree_key_cache_shrink); +write_attribute(trigger_btree_write_buffer_flush); write_attribute(trigger_btree_updates); write_attribute(trigger_freelist_wakeup); write_attribute(trigger_recalc_capacity); @@ -172,7 +173,9 @@ read_attribute(io_latency_read); read_attribute(io_latency_write); read_attribute(io_latency_stats_read); read_attribute(io_latency_stats_write); +#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT read_attribute(congested); +#endif read_attribute(btree_write_stats); @@ -233,14 +236,13 @@ static size_t bch2_btree_cache_size(struct bch_fs *c) size_t ret = 0; struct btree *b; - mutex_lock(&bc->lock); + guard(mutex)(&bc->lock); list_for_each_entry(b, &bc->live[0].list, list) ret += btree_buf_bytes(b); list_for_each_entry(b, &bc->live[1].list, list) ret += btree_buf_bytes(b); list_for_each_entry(b, &bc->freeable, list) ret += btree_buf_bytes(b); - mutex_unlock(&bc->lock); return ret; } @@ -539,6 +541,11 @@ STORE(bch2_fs) c->btree_key_cache.shrink->scan_objects(c->btree_key_cache.shrink, &sc); } + if (attr == &sysfs_trigger_btree_write_buffer_flush) + bch2_trans_do(c, + (bch2_btree_write_buffer_flush_sync(trans), + bch2_trans_begin(trans))); + if (attr == &sysfs_trigger_gc) bch2_gc_gens(c); @@ -563,9 +570,8 @@ STORE(bch2_fs) closure_wake_up(&c->freelist_wait); if (attr == &sysfs_trigger_recalc_capacity) { - down_read(&c->state_lock); + guard(rwsem_read)(&c->state_lock); bch2_recalc_capacity(c); - up_read(&c->state_lock); } if (attr == &sysfs_trigger_delete_dead_snapshots) @@ -710,6 +716,7 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_trigger_journal_writes, &sysfs_trigger_btree_cache_shrink, &sysfs_trigger_btree_key_cache_shrink, + &sysfs_trigger_btree_write_buffer_flush, &sysfs_trigger_btree_updates, &sysfs_trigger_freelist_wakeup, &sysfs_trigger_recalc_capacity, @@ -942,9 +949,10 @@ SHOW(bch2_dev) if (attr == &sysfs_io_latency_stats_write) bch2_time_stats_to_text(out, &ca->io_latency[WRITE].stats); - sysfs_printf(congested, "%u%%", - clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX) - * 100 / CONGESTED_MAX); +#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT + if (attr == &sysfs_congested) + bch2_dev_congested_to_text(out, ca); +#endif if (attr == &sysfs_alloc_debug) bch2_dev_alloc_debug_to_text(out, ca); @@ -1015,7 +1023,9 @@ struct attribute *bch2_dev_files[] = { &sysfs_io_latency_write, &sysfs_io_latency_stats_read, &sysfs_io_latency_stats_write, +#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT &sysfs_congested, +#endif &sysfs_read_fua_test, diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 782a05fe7656..ea27df30cfcb 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -31,7 +31,7 @@ static void delete_test_keys(struct bch_fs *c) static int test_delete(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter; struct bkey_i_cookie k; int ret; @@ -66,13 +66,12 @@ static int test_delete(struct bch_fs *c, u64 nr) goto err; err: bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return ret; } static int test_delete_written(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter; struct bkey_i_cookie k; int ret; @@ -101,7 +100,6 @@ static int test_delete_written(struct bch_fs *c, u64 nr) goto err; err: bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return ret; } @@ -130,13 +128,14 @@ static int test_iterate(struct bch_fs *c, u64 nr) pr_info("iterating forwards"); i = 0; - ret = bch2_trans_run(c, - for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ + CLASS(btree_trans, trans)(c); + + ret = for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + 0, k, ({ BUG_ON(k.k->p.offset != i++); 0; - }))); + })); bch_err_msg(c, ret, "error iterating forwards"); if (ret) return ret; @@ -145,12 +144,11 @@ static int test_iterate(struct bch_fs *c, u64 nr) pr_info("iterating backwards"); - ret = bch2_trans_run(c, - for_each_btree_key_reverse(trans, iter, BTREE_ID_xattrs, + ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_xattrs, SPOS(0, U64_MAX, U32_MAX), 0, k, ({ BUG_ON(k.k->p.offset != --i); 0; - }))); + })); bch_err_msg(c, ret, "error iterating backwards"); if (ret) return ret; @@ -185,14 +183,15 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) pr_info("iterating forwards"); i = 0; - ret = bch2_trans_run(c, - for_each_btree_key_max(trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ + CLASS(btree_trans, trans)(c); + + ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + 0, k, ({ BUG_ON(bkey_start_offset(k.k) != i); i = k.k->p.offset; 0; - }))); + })); bch_err_msg(c, ret, "error iterating forwards"); if (ret) return ret; @@ -201,13 +200,12 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) pr_info("iterating backwards"); - ret = bch2_trans_run(c, - for_each_btree_key_reverse(trans, iter, BTREE_ID_extents, + ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_extents, SPOS(0, U64_MAX, U32_MAX), 0, k, ({ BUG_ON(k.k->p.offset != i); i = bkey_start_offset(k.k); 0; - }))); + })); bch_err_msg(c, ret, "error iterating backwards"); if (ret) return ret; @@ -241,14 +239,15 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) pr_info("iterating forwards"); i = 0; - ret = bch2_trans_run(c, - for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ + CLASS(btree_trans, trans)(c); + + ret = for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + 0, k, ({ BUG_ON(k.k->p.offset != i); i += 2; 0; - }))); + })); bch_err_msg(c, ret, "error iterating forwards"); if (ret) return ret; @@ -258,10 +257,9 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) pr_info("iterating forwards by slots"); i = 0; - ret = bch2_trans_run(c, - for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - BTREE_ITER_slots, k, ({ + ret = for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + BTREE_ITER_slots, k, ({ if (i >= nr * 2) break; @@ -270,7 +268,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) i++; 0; - }))); + })); bch_err_msg(c, ret, "error iterating forwards by slots"); return ret; } @@ -301,15 +299,16 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) pr_info("iterating forwards"); i = 0; - ret = bch2_trans_run(c, - for_each_btree_key_max(trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ + CLASS(btree_trans, trans)(c); + + ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + 0, k, ({ BUG_ON(bkey_start_offset(k.k) != i + 8); BUG_ON(k.k->size != 8); i += 16; 0; - }))); + })); bch_err_msg(c, ret, "error iterating forwards"); if (ret) return ret; @@ -319,10 +318,9 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) pr_info("iterating forwards by slots"); i = 0; - ret = bch2_trans_run(c, - for_each_btree_key_max(trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - BTREE_ITER_slots, k, ({ + ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + BTREE_ITER_slots, k, ({ if (i == nr) break; BUG_ON(bkey_deleted(k.k) != !(i % 16)); @@ -331,7 +329,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) BUG_ON(k.k->size != 8); i = k.k->p.offset; 0; - }))); + })); bch_err_msg(c, ret, "error iterating forwards by slots"); return ret; } @@ -344,7 +342,7 @@ static int test_peek_end(struct bch_fs *c, u64 nr) { delete_test_keys(c); - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter; struct bkey_s_c k; @@ -358,7 +356,6 @@ static int test_peek_end(struct bch_fs *c, u64 nr) BUG_ON(k.k); bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return 0; } @@ -366,7 +363,7 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr) { delete_test_keys(c); - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter; struct bkey_s_c k; @@ -380,7 +377,6 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr) BUG_ON(k.k); bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return 0; } @@ -392,15 +388,13 @@ static int insert_test_extent(struct bch_fs *c, u64 start, u64 end) { struct bkey_i_cookie k; - int ret; - bkey_cookie_init(&k.k_i); k.k_i.k.p.offset = end; k.k_i.k.p.snapshot = U32_MAX; k.k_i.k.size = end - start; k.k_i.k.bversion.lo = test_version++; - ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0, 0); + int ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0, 0); bch_err_fn(c, ret); return ret; } @@ -446,15 +440,14 @@ static int test_extent_overwrite_all(struct bch_fs *c, u64 nr) static int insert_test_overlapping_extent(struct bch_fs *c, u64 inum, u64 start, u32 len, u32 snapid) { struct bkey_i_cookie k; - int ret; - bkey_cookie_init(&k.k_i); k.k_i.k.p.inode = inum; k.k_i.k.p.offset = start + len; k.k_i.k.p.snapshot = snapid; k.k_i.k.size = len; - ret = bch2_trans_commit_do(c, NULL, NULL, 0, + CLASS(btree_trans, trans)(c); + int ret = commit_do(trans, NULL, NULL, 0, bch2_btree_insert_nonextent(trans, BTREE_ID_extents, &k.k_i, BTREE_UPDATE_internal_snapshot_node)); bch_err_fn(c, ret); @@ -477,7 +470,6 @@ static int test_extent_create_overlapping(struct bch_fs *c, u64 inum) /* Test skipping over keys in unrelated snapshots: */ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) { - struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; struct bkey_i_cookie cookie; @@ -489,7 +481,7 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) if (ret) return ret; - trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, SPOS(0, 0, snapid_lo), 0); lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)))); @@ -497,28 +489,28 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) BUG_ON(k.k->p.snapshot != U32_MAX); bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return ret; } static int test_snapshots(struct bch_fs *c, u64 nr) { struct bkey_i_cookie cookie; - u32 snapids[2]; - u32 snapid_subvols[2] = { 1, 1 }; - int ret; - bkey_cookie_init(&cookie.k_i); cookie.k.p.snapshot = U32_MAX; - ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0, 0); + + int ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0, 0); if (ret) return ret; - ret = bch2_trans_commit_do(c, NULL, NULL, 0, - bch2_snapshot_node_create(trans, U32_MAX, - snapids, - snapid_subvols, - 2)); + u32 snapids[2]; + u32 snapid_subvols[2] = { 1, 1 }; + + CLASS(btree_trans, trans)(c); + ret = commit_do(trans, NULL, NULL, 0, + bch2_snapshot_node_create(trans, U32_MAX, + snapids, + snapid_subvols, + 2)); if (ret) return ret; @@ -542,42 +534,37 @@ static u64 test_rand(void) static int rand_insert(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); - struct bkey_i_cookie k; - int ret = 0; - u64 i; + CLASS(btree_trans, trans)(c); - for (i = 0; i < nr; i++) { + for (u64 i = 0; i < nr; i++) { + struct bkey_i_cookie k; bkey_cookie_init(&k.k_i); k.k.p.offset = test_rand(); k.k.p.snapshot = U32_MAX; - ret = commit_do(trans, NULL, NULL, 0, + int ret = commit_do(trans, NULL, NULL, 0, bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k.k_i, 0)); if (ret) - break; + return ret; } - bch2_trans_put(trans); - return ret; + return 0; } static int rand_insert_multi(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bkey_i_cookie k[8]; - int ret = 0; unsigned j; - u64 i; - for (i = 0; i < nr; i += ARRAY_SIZE(k)) { + for (u64 i = 0; i < nr; i += ARRAY_SIZE(k)) { for (j = 0; j < ARRAY_SIZE(k); j++) { bkey_cookie_init(&k[j].k_i); k[j].k.p.offset = test_rand(); k[j].k.p.snapshot = U32_MAX; } - ret = commit_do(trans, NULL, NULL, 0, + int ret = commit_do(trans, NULL, NULL, 0, bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[0].k_i, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[1].k_i, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[2].k_i, 0) ?: @@ -587,25 +574,23 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr) bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[6].k_i, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[7].k_i, 0)); if (ret) - break; + return ret; } - bch2_trans_put(trans); - return ret; + return 0; } static int rand_lookup(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter; struct bkey_s_c k; int ret = 0; - u64 i; bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), 0); - for (i = 0; i < nr; i++) { + for (u64 i = 0; i < nr; i++) { bch2_btree_iter_set_pos(trans, &iter, SPOS(0, test_rand(), U32_MAX)); lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(trans, &iter))); @@ -615,7 +600,6 @@ static int rand_lookup(struct bch_fs *c, u64 nr) } bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return ret; } @@ -646,17 +630,16 @@ static int rand_mixed_trans(struct btree_trans *trans, static int rand_mixed(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter; struct bkey_i_cookie cookie; int ret = 0; - u64 i, rand; bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), 0); - for (i = 0; i < nr; i++) { - rand = test_rand(); + for (u64 i = 0; i < nr; i++) { + u64 rand = test_rand(); ret = commit_do(trans, NULL, NULL, 0, rand_mixed_trans(trans, &iter, &cookie, i, rand)); if (ret) @@ -664,7 +647,6 @@ static int rand_mixed(struct bch_fs *c, u64 nr) } bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return ret; } @@ -692,31 +674,27 @@ err: static int rand_delete(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); - int ret = 0; - u64 i; + CLASS(btree_trans, trans)(c); - for (i = 0; i < nr; i++) { + for (u64 i = 0; i < nr; i++) { struct bpos pos = SPOS(0, test_rand(), U32_MAX); - ret = commit_do(trans, NULL, NULL, 0, + int ret = commit_do(trans, NULL, NULL, 0, __do_delete(trans, pos)); if (ret) - break; + return ret; } - bch2_trans_put(trans); - return ret; + return 0; } static int seq_insert(struct bch_fs *c, u64 nr) { struct bkey_i_cookie insert; - bkey_cookie_init(&insert.k_i); - return bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), BTREE_ITER_slots|BTREE_ITER_intent, k, NULL, NULL, 0, ({ @@ -724,22 +702,22 @@ static int seq_insert(struct bch_fs *c, u64 nr) break; insert.k.p = iter.pos; bch2_trans_update(trans, &iter, &insert.k_i, 0); - }))); + })); } static int seq_lookup(struct bch_fs *c, u64 nr) { - return bch2_trans_run(c, - for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), POS(0, U64_MAX), 0, k, - 0)); + 0); } static int seq_overwrite(struct bch_fs *c, u64 nr) { - return bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), BTREE_ITER_intent, k, NULL, NULL, 0, ({ @@ -747,7 +725,7 @@ static int seq_overwrite(struct bch_fs *c, u64 nr) bkey_reassemble(&u.k_i, k); bch2_trans_update(trans, &iter, &u.k_i, 0); - }))); + })); } static int seq_delete(struct bch_fs *c, u64 nr) @@ -808,8 +786,8 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname, { struct test_job j = { .c = c, .nr = nr, .nr_threads = nr_threads }; char name_buf[20]; - struct printbuf nr_buf = PRINTBUF; - struct printbuf per_sec_buf = PRINTBUF; + CLASS(printbuf, nr_buf)(); + CLASS(printbuf, per_sec_buf)(); unsigned i; u64 time; @@ -883,8 +861,6 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname, div_u64(time, NSEC_PER_SEC), div_u64(time * nr_threads, nr), per_sec_buf.buf); - printbuf_exit(&per_sec_buf); - printbuf_exit(&nr_buf); return j.ret; } diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c index 314a24d15d4e..c2eae0ab7765 100644 --- a/fs/bcachefs/thread_with_file.c +++ b/fs/bcachefs/thread_with_file.c @@ -60,8 +60,7 @@ int bch2_run_thread_with_file(struct thread_with_file *thr, err: if (fd >= 0) put_unused_fd(fd); - if (thr->task) - kthread_stop(thr->task); + kthread_stop(thr->task); return ret; } @@ -185,23 +184,23 @@ static ssize_t thread_with_stdio_write(struct file *file, const char __user *ubu break; } - spin_lock(&buf->lock); - size_t makeroom = b; - if (!buf->waiting_for_line || memchr(buf->buf.data, '\n', buf->buf.nr)) - makeroom = min_t(ssize_t, makeroom, - max_t(ssize_t, STDIO_REDIRECT_BUFSIZE - buf->buf.nr, - 0)); - darray_make_room_gfp(&buf->buf, makeroom, GFP_NOWAIT); - - b = min(len, darray_room(buf->buf)); - - if (b && !copy_from_user_nofault(&darray_top(buf->buf), ubuf, b)) { - buf->buf.nr += b; - ubuf += b; - len -= b; - copied += b; + scoped_guard(spinlock, &buf->lock) { + size_t makeroom = b; + if (!buf->waiting_for_line || memchr(buf->buf.data, '\n', buf->buf.nr)) + makeroom = min_t(ssize_t, makeroom, + max_t(ssize_t, STDIO_REDIRECT_BUFSIZE - buf->buf.nr, + 0)); + darray_make_room_gfp(&buf->buf, makeroom, GFP_NOWAIT); + + b = min(len, darray_room(buf->buf)); + + if (b && !copy_from_user_nofault(&darray_top(buf->buf), ubuf, b)) { + buf->buf.nr += b; + ubuf += b; + len -= b; + copied += b; + } } - spin_unlock(&buf->lock); if (b) { wake_up(&buf->wait); @@ -349,14 +348,15 @@ int bch2_stdio_redirect_read(struct stdio_redirect *stdio, char *ubuf, size_t le if (stdio->done) return -1; - spin_lock(&buf->lock); - int ret = min(len, buf->buf.nr); - buf->buf.nr -= ret; - memcpy(ubuf, buf->buf.data, ret); - memmove(buf->buf.data, - buf->buf.data + ret, - buf->buf.nr); - spin_unlock(&buf->lock); + int ret; + scoped_guard(spinlock, &buf->lock) { + ret = min(len, buf->buf.nr); + buf->buf.nr -= ret; + memcpy(ubuf, buf->buf.data, ret); + memmove(buf->buf.data, + buf->buf.data + ret, + buf->buf.nr); + } wake_up(&buf->wait); return ret; diff --git a/fs/bcachefs/time_stats.c b/fs/bcachefs/time_stats.c index 2c34fe4be912..7b5fa44807d7 100644 --- a/fs/bcachefs/time_stats.c +++ b/fs/bcachefs/time_stats.c @@ -138,10 +138,8 @@ void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) GFP_ATOMIC); spin_unlock_irqrestore(&stats->lock, flags); } else { - struct time_stat_buffer *b; - - preempt_disable(); - b = this_cpu_ptr(stats->buffer); + guard(preempt)(); + struct time_stat_buffer *b = this_cpu_ptr(stats->buffer); BUG_ON(b->nr >= ARRAY_SIZE(b->entries)); b->entries[b->nr++] = (struct time_stat_buffer_entry) { @@ -151,7 +149,6 @@ void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) if (unlikely(b->nr == ARRAY_SIZE(b->entries))) time_stats_clear_buffer(stats, b); - preempt_enable(); } } diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index b5dae1145afa..3776a1403104 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -292,23 +292,9 @@ DEFINE_EVENT(bio, io_read_promote, TP_ARGS(bio) ); -TRACE_EVENT(io_read_nopromote, - TP_PROTO(struct bch_fs *c, int ret), - TP_ARGS(c, ret), - - TP_STRUCT__entry( - __field(dev_t, dev ) - __array(char, ret, 32 ) - ), - - TP_fast_assign( - __entry->dev = c->dev; - strscpy(__entry->ret, bch2_err_str(ret), sizeof(__entry->ret)); - ), - - TP_printk("%d,%d ret %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ret) +DEFINE_EVENT(fs_str, io_read_nopromote, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) ); DEFINE_EVENT(bio, io_read_bounce, @@ -1330,6 +1316,11 @@ DEFINE_EVENT(fs_str, data_update, TP_ARGS(c, str) ); +DEFINE_EVENT(fs_str, data_update_done_no_rw_devs, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + DEFINE_EVENT(fs_str, io_move_pred, TP_PROTO(struct bch_fs *c, const char *str), TP_ARGS(c, str) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 7a4436fd4441..2ded7f3c835f 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -321,11 +321,10 @@ void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack) int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr, gfp_t gfp) { - bch_stacktrace stack = { 0 }; + CLASS(bch_stacktrace, stack)(); int ret = bch2_save_backtrace(&stack, task, skipnr + 1, gfp); bch2_prt_backtrace(out, &stack); - darray_exit(&stack); return ret; } @@ -982,9 +981,8 @@ u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr) int cpu; /* access to pcpu vars has to be blocked by other locking */ - preempt_disable(); - ret = this_cpu_ptr(p); - preempt_enable(); + scoped_guard(preempt) + ret = this_cpu_ptr(p); for_each_possible_cpu(cpu) { u64 *i = per_cpu_ptr(p, cpu); diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 6488f098d140..52ac8230be9f 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -216,7 +216,8 @@ void bch2_prt_u64_base2(struct printbuf *, u64); void bch2_print_string_as_lines(const char *, const char *); -typedef DARRAY(unsigned long) bch_stacktrace; +DEFINE_DARRAY_NAMED(bch_stacktrace, unsigned long); + int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t); void bch2_prt_backtrace(struct printbuf *, bch_stacktrace *); int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *, unsigned, gfp_t); @@ -732,6 +733,13 @@ static inline bool test_bit_le64(size_t bit, __le64 *addr) return (addr[bit / 64] & cpu_to_le64(BIT_ULL(bit % 64))) != 0; } +static inline bool __test_and_set_bit_le64(size_t bit, __le64 *addr) +{ + bool ret = test_bit_le64(bit, addr); + __set_bit_le64(bit, addr); + return ret; +} + static inline void memcpy_swab(void *_dst, void *_src, size_t len) { u8 *dst = _dst + len; diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 627f153798c6..903e20cd34fa 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -313,8 +313,8 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) struct xattr_buf buf = { .buf = buffer, .len = buffer_size }; u64 offset = 0, inum = inode->ei_inode.bi_inum; - int ret = bch2_trans_run(c, - for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_xattrs, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_xattrs, POS(inum, offset), POS(inum, U64_MAX), inode->ei_inum.subvol, 0, k, ({ @@ -322,7 +322,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) continue; bch2_xattr_emit(dentry, bkey_s_c_to_xattr(k).v, &buf); - }))) ?: + })) ?: bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, false) ?: bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, true); @@ -335,9 +335,10 @@ static int bch2_xattr_get_handler(const struct xattr_handler *handler, { struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; - int ret = bch2_trans_do(c, - bch2_xattr_get_trans(trans, inode, name, buffer, size, handler->flags)); + CLASS(btree_trans, trans)(c); + int ret = lockrestart_do(trans, + bch2_xattr_get_trans(trans, inode, name, buffer, size, handler->flags)); if (ret < 0 && bch2_err_matches(ret, ENOENT)) ret = -ENODATA; @@ -356,12 +357,12 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler, struct bch_inode_unpacked inode_u; int ret; - ret = bch2_trans_run(c, - commit_do(trans, NULL, NULL, 0, + CLASS(btree_trans, trans)(c); + ret = commit_do(trans, NULL, NULL, 0, bch2_xattr_set(trans, inode_inum(inode), &inode_u, &hash, name, value, size, handler->flags, flags)) ?: - (bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME), 0)); + (bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME), 0); return bch2_err_class(ret); } @@ -418,7 +419,6 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler, bch2_inode_opts_to_opts(&inode->ei_inode); const struct bch_option *opt; int id, inode_opt_id; - struct printbuf out = PRINTBUF; int ret; u64 v; @@ -439,6 +439,7 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler, !(inode->ei_inode.bi_fields_set & (1 << inode_opt_id))) return -ENODATA; + CLASS(printbuf, out)(); v = bch2_opt_get_by_id(&opts, id); bch2_opt_to_text(&out, c, c->disk_sb.sb, opt, v, 0); @@ -453,7 +454,6 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler, memcpy(buffer, out.buf, out.pos); } - printbuf_exit(&out); return ret; } @@ -532,11 +532,11 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, kfree(buf); if (ret < 0) - goto err_class_exit; + goto err; ret = bch2_opt_hook_pre_set(c, NULL, opt_id, v); if (ret < 0) - goto err_class_exit; + goto err; s.v = v + 1; s.defined = true; @@ -548,7 +548,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, * rename() also has to deal with keeping inherited options up * to date - see bch2_reinherit_attrs() */ - spin_lock(&dentry->d_lock); + guard(spinlock)(&dentry->d_lock); if (!IS_ROOT(dentry)) { struct bch_inode_info *dir = to_bch_ei(d_inode(dentry->d_parent)); @@ -557,26 +557,24 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, } else { s.v = 0; } - spin_unlock(&dentry->d_lock); s.defined = false; } - mutex_lock(&inode->ei_update_lock); - if (inode_opt_id == Inode_opt_project) { - /* - * inode fields accessible via the xattr interface are stored - * with a +1 bias, so that 0 means unset: - */ - ret = bch2_set_projid(c, inode, s.v ? s.v - 1 : 0); - if (ret) - goto err; - } + scoped_guard(mutex, &inode->ei_update_lock) { + if (inode_opt_id == Inode_opt_project) { + /* + * inode fields accessible via the xattr interface are stored + * with a +1 bias, so that 0 means unset: + */ + ret = bch2_set_projid(c, inode, s.v ? s.v - 1 : 0); + if (ret) + goto err; + } - ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0); + ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0); + } err: - mutex_unlock(&inode->ei_update_lock); -err_class_exit: return bch2_err_class(ret); } diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 9de356bcb411..aa176cc9a324 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -83,6 +83,8 @@ enum btrfs_block_group_flags { BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, /* Does the block group need to be added to the free space tree? */ BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, + /* Set after we add a new block group to the free space tree. */ + BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, /* Indicate that the block group is placed on a sequential zone */ BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, /* diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index c7cc24a5dd5e..8c597fa60523 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1377,7 +1377,10 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info) { - WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root)); + struct btrfs_delayed_node *node = btrfs_first_delayed_node(fs_info->delayed_root); + + if (WARN_ON(node)) + refcount_dec(&node->refs); } static bool could_end_wait(struct btrfs_delayed_root *delayed_root, int seq) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1beb9458f622..0d6ad7512f21 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1835,6 +1835,8 @@ void btrfs_put_root(struct btrfs_root *root) if (refcount_dec_and_test(&root->refs)) { if (WARN_ON(!xa_empty(&root->inodes))) xa_destroy(&root->inodes); + if (WARN_ON(!xa_empty(&root->delayed_nodes))) + xa_destroy(&root->delayed_nodes); WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state)); if (root->anon_dev) free_anon_bdev(root->anon_dev); @@ -2156,8 +2158,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root, found = true; root = read_tree_root_path(tree_root, path, &key); if (IS_ERR(root)) { - if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) - ret = PTR_ERR(root); + ret = PTR_ERR(root); break; } set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); @@ -4310,8 +4311,8 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) * * So wait for all ongoing ordered extents to complete and then run * delayed iputs. This works because once we reach this point no one - * can either create new ordered extents nor create delayed iputs - * through some other means. + * can create new ordered extents, but delayed iputs can still be added + * by a reclaim worker (see comments further below). * * Also note that btrfs_wait_ordered_roots() is not safe here, because * it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent, @@ -4322,15 +4323,29 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) btrfs_flush_workqueue(fs_info->endio_write_workers); /* Ordered extents for free space inodes. */ btrfs_flush_workqueue(fs_info->endio_freespace_worker); + /* + * Run delayed iputs in case an async reclaim worker is waiting for them + * to be run as mentioned above. + */ btrfs_run_delayed_iputs(fs_info); - /* There should be no more workload to generate new delayed iputs. */ - set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state); cancel_work_sync(&fs_info->async_reclaim_work); cancel_work_sync(&fs_info->async_data_reclaim_work); cancel_work_sync(&fs_info->preempt_reclaim_work); cancel_work_sync(&fs_info->em_shrinker_work); + /* + * Run delayed iputs again because an async reclaim worker may have + * added new ones if it was flushing delalloc: + * + * shrink_delalloc() -> btrfs_start_delalloc_roots() -> + * start_delalloc_inodes() -> btrfs_add_delayed_iput() + */ + btrfs_run_delayed_iputs(fs_info); + + /* There should be no more workload to generate new delayed iputs. */ + set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state); + /* Cancel or finish ongoing discard work */ btrfs_discard_cleanup(fs_info); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 849199768664..1dc931c4937f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4312,7 +4312,6 @@ static int try_release_subpage_extent_buffer(struct folio *folio) spin_unlock(&eb->refs_lock); continue; } - xa_unlock_irq(&fs_info->buffer_tree); /* * If tree ref isn't set then we know the ref on this eb is a @@ -4329,6 +4328,7 @@ static int try_release_subpage_extent_buffer(struct folio *folio) * check the folio private at the end. And * release_extent_buffer() will release the refs_lock. */ + xa_unlock_irq(&fs_info->buffer_tree); release_extent_buffer(eb); xa_lock_irq(&fs_info->buffer_tree); } diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 0c573d46639a..a83c268f7f87 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1115,11 +1115,21 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0); if (ret < 0) goto out_locked; - ASSERT(ret == 0); + /* + * If ret is 1 (no key found), it means this is an empty block group, + * without any extents allocated from it and there's no block group + * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree + * because we are using the block group tree feature, so block group + * items are stored in the block group tree. It also means there are no + * extents allocated for block groups with a start offset beyond this + * block group's end offset (this is the last, highest, block group). + */ + if (!btrfs_fs_compat_ro(trans->fs_info, BLOCK_GROUP_TREE)) + ASSERT(ret == 0); start = block_group->start; end = block_group->start + block_group->length; - while (1) { + while (ret == 0) { btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.type == BTRFS_EXTENT_ITEM_KEY || @@ -1149,8 +1159,6 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, ret = btrfs_next_item(extent_root, path); if (ret < 0) goto out_locked; - if (ret) - break; } if (start < end) { ret = __add_to_free_space_tree(trans, block_group, path2, @@ -1233,6 +1241,7 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans, { BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; + struct rb_node *node; int nr; int ret; @@ -1261,6 +1270,16 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans, btrfs_release_path(path); } + node = rb_first_cached(&trans->fs_info->block_group_cache_tree); + while (node) { + struct btrfs_block_group *bg; + + bg = rb_entry(node, struct btrfs_block_group, cache_node); + clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags); + node = rb_next(node); + cond_resched(); + } + return 0; } @@ -1350,12 +1369,18 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info) block_group = rb_entry(node, struct btrfs_block_group, cache_node); + + if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, + &block_group->runtime_flags)) + goto next; + ret = populate_free_space_tree(trans, block_group); if (ret) { btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); return ret; } +next: if (btrfs_should_end_transaction(trans)) { btrfs_end_transaction(trans); trans = btrfs_start_transaction(free_space_root, 1); @@ -1382,6 +1407,29 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans, clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags); + /* + * While rebuilding the free space tree we may allocate new metadata + * block groups while modifying the free space tree. + * + * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we + * can use multiple transactions, every time btrfs_end_transaction() is + * called at btrfs_rebuild_free_space_tree() we finish the creation of + * new block groups by calling btrfs_create_pending_block_groups(), and + * that in turn calls us, through add_block_group_free_space(), to add + * a free space info item and a free space extent item for the block + * group. + * + * Then later btrfs_rebuild_free_space_tree() may find such new block + * groups and processes them with populate_free_space_tree(), which can + * fail with EEXIST since there are already items for the block group in + * the free space tree. Notice that we say "may find" because a new + * block group may be added to the block groups rbtree in a node before + * or after the block group currently being processed by the rebuild + * process. So signal the rebuild process to skip such new block groups + * if it finds them. + */ + set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags); + ret = add_new_free_space_info(trans, block_group, path); if (ret) return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c0c778243bf1..fc66872b4c74 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4250,9 +4250,9 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, ret = btrfs_del_inode_ref(trans, root, name, ino, dir_ino, &index); if (ret) { - btrfs_info(fs_info, - "failed to delete reference to %.*s, inode %llu parent %llu", - name->len, name->name, ino, dir_ino); + btrfs_crit(fs_info, + "failed to delete reference to %.*s, root %llu inode %llu parent %llu", + name->len, name->name, btrfs_root_id(root), ino, dir_ino); btrfs_abort_transaction(trans, ret); goto err; } @@ -4710,7 +4710,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; int ret = 0; struct btrfs_trans_handle *trans; - u64 last_unlink_trans; struct fscrypt_name fname; if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) @@ -4736,6 +4735,23 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) goto out_notrans; } + /* + * Propagate the last_unlink_trans value of the deleted dir to its + * parent directory. This is to prevent an unrecoverable log tree in the + * case we do something like this: + * 1) create dir foo + * 2) create snapshot under dir foo + * 3) delete the snapshot + * 4) rmdir foo + * 5) mkdir foo + * 6) fsync foo or some file inside foo + * + * This is because we can't unlink other roots when replaying the dir + * deletes for directory foo. + */ + if (BTRFS_I(inode)->last_unlink_trans >= trans->transid) + btrfs_record_snapshot_destroy(trans, BTRFS_I(dir)); + if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { ret = btrfs_unlink_subvol(trans, BTRFS_I(dir), dentry); goto out; @@ -4745,27 +4761,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) if (ret) goto out; - last_unlink_trans = BTRFS_I(inode)->last_unlink_trans; - /* now the directory is empty */ ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), &fname.disk_name); - if (!ret) { + if (!ret) btrfs_i_size_write(BTRFS_I(inode), 0); - /* - * Propagate the last_unlink_trans value of the deleted dir to - * its parent directory. This is to prevent an unrecoverable - * log tree in the case we do something like this: - * 1) create dir foo - * 2) create snapshot under dir foo - * 3) delete the snapshot - * 4) rmdir foo - * 5) mkdir foo - * 6) fsync foo or some file inside foo - */ - if (last_unlink_trans >= trans->transid) - BTRFS_I(dir)->last_unlink_trans = last_unlink_trans; - } out: btrfs_end_transaction(trans); out_notrans: @@ -8059,6 +8059,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, int ret; int ret2; bool need_abort = false; + bool logs_pinned = false; struct fscrypt_name old_fname, new_fname; struct fscrypt_str *old_name, *new_name; @@ -8182,6 +8183,31 @@ static int btrfs_rename_exchange(struct inode *old_dir, inode_inc_iversion(new_inode); simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); + if (old_ino != BTRFS_FIRST_FREE_OBJECTID && + new_ino != BTRFS_FIRST_FREE_OBJECTID) { + /* + * If we are renaming in the same directory (and it's not for + * root entries) pin the log early to prevent any concurrent + * task from logging the directory after we removed the old + * entries and before we add the new entries, otherwise that + * task can sync a log without any entry for the inodes we are + * renaming and therefore replaying that log, if a power failure + * happens after syncing the log, would result in deleting the + * inodes. + * + * If the rename affects two different directories, we want to + * make sure the that there's no log commit that contains + * updates for only one of the directories but not for the + * other. + * + * If we are renaming an entry for a root, we don't care about + * log updates since we called btrfs_set_log_full_commit(). + */ + btrfs_pin_log_trans(root); + btrfs_pin_log_trans(dest); + logs_pinned = true; + } + if (old_dentry->d_parent != new_dentry->d_parent) { btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), BTRFS_I(old_inode), true); @@ -8253,30 +8279,23 @@ static int btrfs_rename_exchange(struct inode *old_dir, BTRFS_I(new_inode)->dir_index = new_idx; /* - * Now pin the logs of the roots. We do it to ensure that no other task - * can sync the logs while we are in progress with the rename, because - * that could result in an inconsistency in case any of the inodes that - * are part of this rename operation were logged before. + * Do the log updates for all inodes. + * + * If either entry is for a root we don't need to update the logs since + * we've called btrfs_set_log_full_commit() before. */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) - btrfs_pin_log_trans(root); - if (new_ino != BTRFS_FIRST_FREE_OBJECTID) - btrfs_pin_log_trans(dest); - - /* Do the log updates for all inodes. */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) + if (logs_pinned) { btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir), old_rename_ctx.index, new_dentry->d_parent); - if (new_ino != BTRFS_FIRST_FREE_OBJECTID) btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir), new_rename_ctx.index, old_dentry->d_parent); + } - /* Now unpin the logs. */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) +out_fail: + if (logs_pinned) { btrfs_end_log_trans(root); - if (new_ino != BTRFS_FIRST_FREE_OBJECTID) btrfs_end_log_trans(dest); -out_fail: + } ret2 = btrfs_end_transaction(trans); ret = ret ? ret : ret2; out_notrans: @@ -8326,6 +8345,7 @@ static int btrfs_rename(struct mnt_idmap *idmap, int ret2; u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); struct fscrypt_name old_fname, new_fname; + bool logs_pinned = false; if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) return -EPERM; @@ -8460,6 +8480,29 @@ static int btrfs_rename(struct mnt_idmap *idmap, inode_inc_iversion(old_inode); simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); + if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { + /* + * If we are renaming in the same directory (and it's not a + * root entry) pin the log to prevent any concurrent task from + * logging the directory after we removed the old entry and + * before we add the new entry, otherwise that task can sync + * a log without any entry for the inode we are renaming and + * therefore replaying that log, if a power failure happens + * after syncing the log, would result in deleting the inode. + * + * If the rename affects two different directories, we want to + * make sure the that there's no log commit that contains + * updates for only one of the directories but not for the + * other. + * + * If we are renaming an entry for a root, we don't care about + * log updates since we called btrfs_set_log_full_commit(). + */ + btrfs_pin_log_trans(root); + btrfs_pin_log_trans(dest); + logs_pinned = true; + } + if (old_dentry->d_parent != new_dentry->d_parent) btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), BTRFS_I(old_inode), true); @@ -8524,7 +8567,7 @@ static int btrfs_rename(struct mnt_idmap *idmap, if (old_inode->i_nlink == 1) BTRFS_I(old_inode)->dir_index = index; - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) + if (logs_pinned) btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir), rename_ctx.index, new_dentry->d_parent); @@ -8540,6 +8583,10 @@ static int btrfs_rename(struct mnt_idmap *idmap, } } out_fail: + if (logs_pinned) { + btrfs_end_log_trans(root); + btrfs_end_log_trans(dest); + } ret2 = btrfs_end_transaction(trans); ret = ret ? ret : ret2; out_notrans: diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 913acef3f0a9..8a60983a697c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -666,14 +666,14 @@ static noinline int create_subvol(struct mnt_idmap *idmap, goto out; } + btrfs_record_new_subvolume(trans, BTRFS_I(dir)); + ret = btrfs_create_new_inode(trans, &new_inode_args); if (ret) { btrfs_abort_transaction(trans, ret); goto out; } - btrfs_record_new_subvolume(trans, BTRFS_I(dir)); - d_instantiate_new(dentry, new_inode_args.inode); new_inode_args.inode = NULL; @@ -3139,7 +3139,7 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg) return -EPERM; if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) { - btrfs_err(fs_info, "scrub is not supported on extent tree v2 yet"); + btrfs_err(fs_info, "scrub: extent tree v2 not yet supported"); return -EINVAL; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index ce36fafc771e..7cd5e76a783c 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -557,7 +557,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, */ for (i = 0; i < ipath->fspath->elem_cnt; ++i) btrfs_warn_in_rcu(fs_info, -"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %u, links %u (path: %s)", +"scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu length %u links %u (path: %s)", swarn->errstr, swarn->logical, btrfs_dev_name(swarn->dev), swarn->physical, @@ -571,7 +571,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, err: btrfs_warn_in_rcu(fs_info, - "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d", + "scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu: path resolving failed with ret=%d", swarn->errstr, swarn->logical, btrfs_dev_name(swarn->dev), swarn->physical, @@ -596,7 +596,7 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device * /* Super block error, no need to search extent tree. */ if (is_super) { - btrfs_warn_in_rcu(fs_info, "%s on device %s, physical %llu", + btrfs_warn_in_rcu(fs_info, "scrub: %s on device %s, physical %llu", errstr, btrfs_dev_name(dev), physical); return; } @@ -631,14 +631,14 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device * &ref_level); if (ret < 0) { btrfs_warn(fs_info, - "failed to resolve tree backref for logical %llu: %d", - swarn.logical, ret); + "scrub: failed to resolve tree backref for logical %llu: %d", + swarn.logical, ret); break; } if (ret > 0) break; btrfs_warn_in_rcu(fs_info, -"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu", +"scrub: %s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu", errstr, swarn.logical, btrfs_dev_name(dev), swarn.physical, (ref_level ? "node" : "leaf"), ref_level, ref_root); @@ -718,7 +718,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad bytenr, has %llu want %llu", + "scrub: tree block %llu mirror %u has bad bytenr, has %llu want %llu", logical, stripe->mirror_num, btrfs_stack_header_bytenr(header), logical); return; @@ -728,7 +728,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad fsid, has %pU want %pU", + "scrub: tree block %llu mirror %u has bad fsid, has %pU want %pU", logical, stripe->mirror_num, header->fsid, fs_info->fs_devices->fsid); return; @@ -738,7 +738,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU", + "scrub: tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU", logical, stripe->mirror_num, header->chunk_tree_uuid, fs_info->chunk_tree_uuid); return; @@ -760,7 +760,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT, +"scrub: tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT, logical, stripe->mirror_num, CSUM_FMT_VALUE(fs_info->csum_size, on_disk_csum), CSUM_FMT_VALUE(fs_info->csum_size, calculated_csum)); @@ -771,7 +771,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_gen_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad generation, has %llu want %llu", + "scrub: tree block %llu mirror %u has bad generation, has %llu want %llu", logical, stripe->mirror_num, btrfs_stack_header_generation(header), stripe->sectors[sector_nr].generation); @@ -814,7 +814,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr) */ if (unlikely(sector_nr + sectors_per_tree > stripe->nr_sectors)) { btrfs_warn_rl(fs_info, - "tree block at %llu crosses stripe boundary %llu", + "scrub: tree block at %llu crosses stripe boundary %llu", stripe->logical + (sector_nr << fs_info->sectorsize_bits), stripe->logical); @@ -1046,12 +1046,12 @@ skip: if (repaired) { if (dev) { btrfs_err_rl_in_rcu(fs_info, - "fixed up error at logical %llu on dev %s physical %llu", + "scrub: fixed up error at logical %llu on dev %s physical %llu", stripe->logical, btrfs_dev_name(dev), physical); } else { btrfs_err_rl_in_rcu(fs_info, - "fixed up error at logical %llu on mirror %u", + "scrub: fixed up error at logical %llu on mirror %u", stripe->logical, stripe->mirror_num); } continue; @@ -1060,12 +1060,12 @@ skip: /* The remaining are all for unrepaired. */ if (dev) { btrfs_err_rl_in_rcu(fs_info, - "unable to fixup (regular) error at logical %llu on dev %s physical %llu", +"scrub: unable to fixup (regular) error at logical %llu on dev %s physical %llu", stripe->logical, btrfs_dev_name(dev), physical); } else { btrfs_err_rl_in_rcu(fs_info, - "unable to fixup (regular) error at logical %llu on mirror %u", + "scrub: unable to fixup (regular) error at logical %llu on mirror %u", stripe->logical, stripe->mirror_num); } @@ -1593,8 +1593,7 @@ static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical, physical, sctx->write_pointer); if (ret) - btrfs_err(fs_info, - "zoned: failed to recover write pointer"); + btrfs_err(fs_info, "scrub: zoned: failed to recover write pointer"); } mutex_unlock(&sctx->wr_lock); btrfs_dev_clear_zone_empty(sctx->wr_tgtdev, physical); @@ -1658,7 +1657,7 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg, int ret; if (unlikely(!extent_root || !csum_root)) { - btrfs_err(fs_info, "no valid extent or csum root for scrub"); + btrfs_err(fs_info, "scrub: no valid extent or csum root found"); return -EUCLEAN; } memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) * @@ -1907,7 +1906,7 @@ static bool stripe_has_metadata_error(struct scrub_stripe *stripe) struct btrfs_fs_info *fs_info = stripe->bg->fs_info; btrfs_err(fs_info, - "stripe %llu has unrepaired metadata sector at %llu", + "scrub: stripe %llu has unrepaired metadata sector at logical %llu", stripe->logical, stripe->logical + (i << fs_info->sectorsize_bits)); return true; @@ -2167,7 +2166,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx, bitmap_and(&error, &error, &has_extent, stripe->nr_sectors); if (!bitmap_empty(&error, stripe->nr_sectors)) { btrfs_err(fs_info, -"unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl", +"scrub: unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl", full_stripe_start, i, stripe->nr_sectors, &error); ret = -EIO; @@ -2789,14 +2788,14 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, ro_set = 0; } else if (ret == -ETXTBSY) { btrfs_warn(fs_info, - "skipping scrub of block group %llu due to active swapfile", + "scrub: skipping scrub of block group %llu due to active swapfile", cache->start); scrub_pause_off(fs_info); ret = 0; goto skip_unfreeze; } else { - btrfs_warn(fs_info, - "failed setting block group ro: %d", ret); + btrfs_warn(fs_info, "scrub: failed setting block group ro: %d", + ret); btrfs_unfreeze_block_group(cache); btrfs_put_block_group(cache); scrub_pause_off(fs_info); @@ -2892,13 +2891,13 @@ static int scrub_one_super(struct scrub_ctx *sctx, struct btrfs_device *dev, ret = btrfs_check_super_csum(fs_info, sb); if (ret != 0) { btrfs_err_rl(fs_info, - "super block at physical %llu devid %llu has bad csum", + "scrub: super block at physical %llu devid %llu has bad csum", physical, dev->devid); return -EIO; } if (btrfs_super_generation(sb) != generation) { btrfs_err_rl(fs_info, -"super block at physical %llu devid %llu has bad generation %llu expect %llu", +"scrub: super block at physical %llu devid %llu has bad generation %llu expect %llu", physical, dev->devid, btrfs_super_generation(sb), generation); return -EUCLEAN; @@ -3059,7 +3058,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); btrfs_err_in_rcu(fs_info, - "scrub on devid %llu: filesystem on %s is not writable", + "scrub: devid %llu: filesystem on %s is not writable", devid, btrfs_dev_name(dev)); ret = -EROFS; goto out; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 97e933113b82..cea8a7e9d6d3 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -143,6 +143,9 @@ static struct btrfs_inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *r unsigned int nofs_flag; struct btrfs_inode *inode; + /* Only meant to be called for subvolume roots and not for log roots. */ + ASSERT(is_fstree(btrfs_root_id(root))); + /* * We're holding a transaction handle whether we are logging or * replaying a log tree, so we must make sure NOFS semantics apply @@ -604,21 +607,6 @@ static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len, return 0; } -/* - * simple helper to read an inode off the disk from a given root - * This can only be called for subvolume roots and not for the log - */ -static noinline struct btrfs_inode *read_one_inode(struct btrfs_root *root, - u64 objectid) -{ - struct btrfs_inode *inode; - - inode = btrfs_iget_logging(objectid, root); - if (IS_ERR(inode)) - return NULL; - return inode; -} - /* replays a single extent in 'eb' at 'slot' with 'key' into the * subvolume 'root'. path is released on entry and should be released * on exit. @@ -668,15 +656,15 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, extent_end = ALIGN(start + size, fs_info->sectorsize); } else { - ret = 0; - goto out; + btrfs_err(fs_info, + "unexpected extent type=%d root=%llu inode=%llu offset=%llu", + found_type, btrfs_root_id(root), key->objectid, key->offset); + return -EUCLEAN; } - inode = read_one_inode(root, key->objectid); - if (!inode) { - ret = -EIO; - goto out; - } + inode = btrfs_iget_logging(key->objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); /* * first check to see if we already have this extent in the @@ -948,9 +936,10 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, btrfs_release_path(path); - inode = read_one_inode(root, location.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(location.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } @@ -961,7 +950,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, ret = unlink_inode_for_log_replay(trans, dir, inode, &name); out: kfree(name.name); - iput(&inode->vfs_inode); + if (inode) + iput(&inode->vfs_inode); return ret; } @@ -1072,7 +1062,9 @@ again: search_key.type = BTRFS_INODE_REF_KEY; search_key.offset = parent_objectid; ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); - if (ret == 0) { + if (ret < 0) { + return ret; + } else if (ret == 0) { struct btrfs_inode_ref *victim_ref; unsigned long ptr; unsigned long ptr_end; @@ -1145,13 +1137,13 @@ again: struct fscrypt_str victim_name; extref = (struct btrfs_inode_extref *)(base + cur_offset); + victim_name.len = btrfs_inode_extref_name_len(leaf, extref); if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) goto next; ret = read_alloc_one_name(leaf, &extref->name, - btrfs_inode_extref_name_len(leaf, extref), - &victim_name); + victim_name.len, &victim_name); if (ret) return ret; @@ -1166,18 +1158,18 @@ again: kfree(victim_name.name); return ret; } else if (!ret) { - ret = -ENOENT; - victim_parent = read_one_inode(root, - parent_objectid); - if (victim_parent) { + victim_parent = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(victim_parent)) { + ret = PTR_ERR(victim_parent); + } else { inc_nlink(&inode->vfs_inode); btrfs_release_path(path); ret = unlink_inode_for_log_replay(trans, victim_parent, inode, &victim_name); + iput(&victim_parent->vfs_inode); } - iput(&victim_parent->vfs_inode); kfree(victim_name.name); if (ret) return ret; @@ -1314,9 +1306,9 @@ again: struct btrfs_inode *dir; btrfs_release_path(path); - dir = read_one_inode(root, parent_id); - if (!dir) { - ret = -ENOENT; + dir = btrfs_iget_logging(parent_id, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); kfree(name.name); goto out; } @@ -1388,15 +1380,17 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, * copy the back ref in. The link count fixup code will take * care of the rest */ - dir = read_one_inode(root, parent_objectid); - if (!dir) { - ret = -ENOENT; + dir = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); + dir = NULL; goto out; } - inode = read_one_inode(root, inode_objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(inode_objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } @@ -1408,11 +1402,13 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, * parent object can change from one array * item to another. */ - if (!dir) - dir = read_one_inode(root, parent_objectid); if (!dir) { - ret = -ENOENT; - goto out; + dir = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); + dir = NULL; + goto out; + } } } else { ret = ref_get_fields(eb, ref_ptr, &name, &ref_index); @@ -1681,9 +1677,9 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, break; btrfs_release_path(path); - inode = read_one_inode(root, key.offset); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(key.offset, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); break; } @@ -1719,9 +1715,9 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, struct btrfs_inode *inode; struct inode *vfs_inode; - inode = read_one_inode(root, objectid); - if (!inode) - return -EIO; + inode = btrfs_iget_logging(objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); vfs_inode = &inode->vfs_inode; key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; @@ -1760,14 +1756,14 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans, struct btrfs_inode *dir; int ret; - inode = read_one_inode(root, location->objectid); - if (!inode) - return -ENOENT; + inode = btrfs_iget_logging(location->objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); - dir = read_one_inode(root, dirid); - if (!dir) { + dir = btrfs_iget_logging(dirid, root); + if (IS_ERR(dir)) { iput(&inode->vfs_inode); - return -EIO; + return PTR_ERR(dir); } ret = btrfs_add_link(trans, dir, inode, name, 1, index); @@ -1844,9 +1840,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, bool update_size = true; bool name_added = false; - dir = read_one_inode(root, key->objectid); - if (!dir) - return -EIO; + dir = btrfs_iget_logging(key->objectid, root); + if (IS_ERR(dir)) + return PTR_ERR(dir); ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name); if (ret) @@ -2146,9 +2142,10 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, btrfs_dir_item_key_to_cpu(eb, di, &location); btrfs_release_path(path); btrfs_release_path(log_path); - inode = read_one_inode(root, location.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(location.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } @@ -2300,14 +2297,17 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, if (!log_path) return -ENOMEM; - dir = read_one_inode(root, dirid); - /* it isn't an error if the inode isn't there, that can happen - * because we replay the deletes before we copy in the inode item - * from the log + dir = btrfs_iget_logging(dirid, root); + /* + * It isn't an error if the inode isn't there, that can happen because + * we replay the deletes before we copy in the inode item from the log. */ - if (!dir) { + if (IS_ERR(dir)) { btrfs_free_path(log_path); - return 0; + ret = PTR_ERR(dir); + if (ret == -ENOENT) + ret = 0; + return ret; } range_start = 0; @@ -2466,9 +2466,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, struct btrfs_inode *inode; u64 from; - inode = read_one_inode(root, key.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(key.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); break; } from = ALIGN(i_size_read(&inode->vfs_inode), @@ -7447,6 +7447,8 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, * full log sync. * Also we don't need to worry with renames, since btrfs_rename() marks the log * for full commit when renaming a subvolume. + * + * Must be called before creating the subvolume entry in its parent directory. */ void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans, struct btrfs_inode *dir) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 89835071cfea..f475b4b7c457 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3282,6 +3282,12 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) device->bytes_used - dev_extent_len); atomic64_add(dev_extent_len, &fs_info->free_chunk_space); btrfs_clear_space_info_full(fs_info); + + if (list_empty(&device->post_commit_list)) { + list_add_tail(&device->post_commit_list, + &trans->transaction->dev_update_list); + } + mutex_unlock(&fs_info->chunk_mutex); } } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index b5b0156d5b95..9430b34d3cbb 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1403,7 +1403,8 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg, static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, - unsigned long *active) + unsigned long *active, + u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; @@ -1426,6 +1427,13 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, zone_info[1].physical); return -EIO; } + + if (zone_info[0].alloc_offset == WP_CONVENTIONAL) + zone_info[0].alloc_offset = last_alloc; + + if (zone_info[1].alloc_offset == WP_CONVENTIONAL) + zone_info[1].alloc_offset = last_alloc; + if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) { btrfs_err(bg->fs_info, "zoned: write pointer offset mismatch of zones in DUP profile"); @@ -1446,7 +1454,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, - unsigned long *active) + unsigned long *active, + u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; int i; @@ -1461,10 +1470,12 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity); for (i = 0; i < map->num_stripes; i++) { - if (zone_info[i].alloc_offset == WP_MISSING_DEV || - zone_info[i].alloc_offset == WP_CONVENTIONAL) + if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) + zone_info[i].alloc_offset = last_alloc; + if ((zone_info[0].alloc_offset != zone_info[i].alloc_offset) && !btrfs_test_opt(fs_info, DEGRADED)) { btrfs_err(fs_info, @@ -1494,7 +1505,8 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, - unsigned long *active) + unsigned long *active, + u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; @@ -1505,10 +1517,29 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, } for (int i = 0; i < map->num_stripes; i++) { - if (zone_info[i].alloc_offset == WP_MISSING_DEV || - zone_info[i].alloc_offset == WP_CONVENTIONAL) + if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { + u64 stripe_nr, full_stripe_nr; + u64 stripe_offset; + int stripe_index; + + stripe_nr = div64_u64(last_alloc, map->stripe_size); + stripe_offset = stripe_nr * map->stripe_size; + full_stripe_nr = div_u64(stripe_nr, map->num_stripes); + div_u64_rem(stripe_nr, map->num_stripes, &stripe_index); + + zone_info[i].alloc_offset = + full_stripe_nr * map->stripe_size; + + if (stripe_index > i) + zone_info[i].alloc_offset += map->stripe_size; + else if (stripe_index == i) + zone_info[i].alloc_offset += + (last_alloc - stripe_offset); + } + if (test_bit(0, active) != test_bit(i, active)) { if (!btrfs_zone_activate(bg)) return -EIO; @@ -1526,7 +1557,8 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, - unsigned long *active) + unsigned long *active, + u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; @@ -1537,8 +1569,7 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, } for (int i = 0; i < map->num_stripes; i++) { - if (zone_info[i].alloc_offset == WP_MISSING_DEV || - zone_info[i].alloc_offset == WP_CONVENTIONAL) + if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; if (test_bit(0, active) != test_bit(i, active)) { @@ -1549,6 +1580,29 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); } + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { + u64 stripe_nr, full_stripe_nr; + u64 stripe_offset; + int stripe_index; + + stripe_nr = div64_u64(last_alloc, map->stripe_size); + stripe_offset = stripe_nr * map->stripe_size; + full_stripe_nr = div_u64(stripe_nr, + map->num_stripes / map->sub_stripes); + div_u64_rem(stripe_nr, + (map->num_stripes / map->sub_stripes), + &stripe_index); + + zone_info[i].alloc_offset = + full_stripe_nr * map->stripe_size; + + if (stripe_index > (i / map->sub_stripes)) + zone_info[i].alloc_offset += map->stripe_size; + else if (stripe_index == (i / map->sub_stripes)) + zone_info[i].alloc_offset += + (last_alloc - stripe_offset); + } + if ((i % map->sub_stripes) == 0) { bg->zone_capacity += zone_info[i].capacity; bg->alloc_offset += zone_info[i].alloc_offset; @@ -1637,18 +1691,22 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) ret = btrfs_load_block_group_single(cache, &zone_info[0], active); break; case BTRFS_BLOCK_GROUP_DUP: - ret = btrfs_load_block_group_dup(cache, map, zone_info, active); + ret = btrfs_load_block_group_dup(cache, map, zone_info, active, + last_alloc); break; case BTRFS_BLOCK_GROUP_RAID1: case BTRFS_BLOCK_GROUP_RAID1C3: case BTRFS_BLOCK_GROUP_RAID1C4: - ret = btrfs_load_block_group_raid1(cache, map, zone_info, active); + ret = btrfs_load_block_group_raid1(cache, map, zone_info, + active, last_alloc); break; case BTRFS_BLOCK_GROUP_RAID0: - ret = btrfs_load_block_group_raid0(cache, map, zone_info, active); + ret = btrfs_load_block_group_raid0(cache, map, zone_info, + active, last_alloc); break; case BTRFS_BLOCK_GROUP_RAID10: - ret = btrfs_load_block_group_raid10(cache, map, zone_info, active); + ret = btrfs_load_block_group_raid10(cache, map, zone_info, + active, last_alloc); break; case BTRFS_BLOCK_GROUP_RAID5: case BTRFS_BLOCK_GROUP_RAID6: diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 6a329c329f43..16e4a6bd9b97 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -214,9 +214,11 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) /* * bit 30: I/O error occurred on this folio + * bit 29: CPU has dirty data in D-cache (needs aliasing handling); * bit 0 - 29: remaining parts to complete this folio */ -#define EROFS_ONLINEFOLIO_EIO (1 << 30) +#define EROFS_ONLINEFOLIO_EIO 30 +#define EROFS_ONLINEFOLIO_DIRTY 29 void erofs_onlinefolio_init(struct folio *folio) { @@ -233,19 +235,23 @@ void erofs_onlinefolio_split(struct folio *folio) atomic_inc((atomic_t *)&folio->private); } -void erofs_onlinefolio_end(struct folio *folio, int err) +void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty) { int orig, v; do { orig = atomic_read((atomic_t *)&folio->private); - v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0); + DBG_BUGON(orig <= 0); + v = dirty << EROFS_ONLINEFOLIO_DIRTY; + v |= (orig - 1) | (!!err << EROFS_ONLINEFOLIO_EIO); } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig); - if (v & ~EROFS_ONLINEFOLIO_EIO) + if (v & (BIT(EROFS_ONLINEFOLIO_DIRTY) - 1)) return; folio->private = 0; - folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO)); + if (v & BIT(EROFS_ONLINEFOLIO_DIRTY)) + flush_dcache_folio(folio); + folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO))); } static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, @@ -351,11 +357,16 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, */ static int erofs_read_folio(struct file *file, struct folio *folio) { + trace_erofs_read_folio(folio, true); + return iomap_read_folio(folio, &erofs_iomap_ops); } static void erofs_readahead(struct readahead_control *rac) { + trace_erofs_readahead(rac->mapping->host, readahead_index(rac), + readahead_count(rac), true); + return iomap_readahead(rac, &erofs_iomap_ops); } diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index bf62e2836b60..358061d7b660 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -301,13 +301,11 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, cur = min(cur, rq->outputsize); if (cur && rq->out[0]) { kin = kmap_local_page(rq->in[nrpages_in - 1]); - if (rq->out[0] == rq->in[nrpages_in - 1]) { + if (rq->out[0] == rq->in[nrpages_in - 1]) memmove(kin + rq->pageofs_out, kin + pi, cur); - flush_dcache_page(rq->out[0]); - } else { + else memcpy_to_page(rq->out[0], rq->pageofs_out, kin + pi, cur); - } kunmap_local(kin); } rq->outputsize -= cur; @@ -325,14 +323,12 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, po = (rq->pageofs_out + cur + pi) & ~PAGE_MASK; DBG_BUGON(no >= nrpages_out); cnt = min(insz - pi, PAGE_SIZE - po); - if (rq->out[no] == rq->in[ni]) { + if (rq->out[no] == rq->in[ni]) memmove(kin + po, kin + rq->pageofs_in + pi, cnt); - flush_dcache_page(rq->out[no]); - } else if (rq->out[no]) { + else if (rq->out[no]) memcpy_to_page(rq->out[no], po, kin + rq->pageofs_in + pi, cnt); - } pi += cnt; } while (pi < insz); kunmap_local(kin); diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index 2fae209d0274..3e4b38bec0aa 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -58,6 +58,11 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) struct erofs_dirent *de; unsigned int nameoff, maxsize; + if (fatal_signal_pending(current)) { + err = -ERESTARTSYS; + break; + } + de = erofs_bread(&buf, dbstart, true); if (IS_ERR(de)) { erofs_err(sb, "failed to readdir of logical block %llu of nid %llu", @@ -88,6 +93,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) break; ctx->pos = dbstart + maxsize; ofs = 0; + cond_resched(); } erofs_put_metabuf(&buf); if (EROFS_I(dir)->dot_omitted && ctx->pos == dir->i_size) { diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index 7d81f504bff0..91781718199e 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -38,7 +38,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) } else { bio_for_each_folio_all(fi, &rq->bio) { DBG_BUGON(folio_test_uptodate(fi.folio)); - erofs_onlinefolio_end(fi.folio, ret); + erofs_onlinefolio_end(fi.folio, ret, false); } } bio_uninit(&rq->bio); @@ -47,6 +47,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq) { + const struct cred *old_cred; struct iov_iter iter; int ret; @@ -60,7 +61,9 @@ static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq) rq->iocb.ki_flags = IOCB_DIRECT; iov_iter_bvec(&iter, ITER_DEST, rq->bvecs, rq->bio.bi_vcnt, rq->bio.bi_iter.bi_size); + old_cred = override_creds(rq->iocb.ki_filp->f_cred); ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter); + revert_creds(old_cred); if (ret != -EIOCBQUEUED) erofs_fileio_ki_complete(&rq->iocb, ret); } @@ -93,8 +96,6 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio) struct erofs_map_blocks *map = &io->map; unsigned int cur = 0, end = folio_size(folio), len, attached = 0; loff_t pos = folio_pos(folio), ofs; - struct iov_iter iter; - struct bio_vec bv; int err = 0; erofs_onlinefolio_init(folio); @@ -119,13 +120,7 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio) err = PTR_ERR(src); break; } - bvec_set_folio(&bv, folio, len, cur); - iov_iter_bvec(&iter, ITER_DEST, &bv, 1, len); - if (copy_to_iter(src, len, &iter) != len) { - erofs_put_metabuf(&buf); - err = -EIO; - break; - } + memcpy_to_folio(folio, cur, src, len); erofs_put_metabuf(&buf); } else if (!(map->m_flags & EROFS_MAP_MAPPED)) { folio_zero_segment(folio, cur, cur + len); @@ -159,7 +154,7 @@ io_retry: } cur += len; } - erofs_onlinefolio_end(folio, err); + erofs_onlinefolio_end(folio, err, false); return err; } diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index a32c03a80c70..06b867d2fc3b 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -315,10 +315,12 @@ static inline struct folio *erofs_grab_folio_nowait(struct address_space *as, /* The length of extent is full */ #define EROFS_MAP_FULL_MAPPED 0x0008 /* Located in the special packed inode */ -#define EROFS_MAP_FRAGMENT 0x0010 +#define __EROFS_MAP_FRAGMENT 0x0010 /* The extent refers to partial decompressed data */ #define EROFS_MAP_PARTIAL_REF 0x0020 +#define EROFS_MAP_FRAGMENT (EROFS_MAP_MAPPED | __EROFS_MAP_FRAGMENT) + struct erofs_map_blocks { struct erofs_buf buf; @@ -390,7 +392,7 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map); void erofs_onlinefolio_init(struct folio *folio); void erofs_onlinefolio_split(struct folio *folio); -void erofs_onlinefolio_end(struct folio *folio, int err); +void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty); struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid); int erofs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index fe8071844724..e3f28a1bb945 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1034,7 +1034,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f, if (!(map->m_flags & EROFS_MAP_MAPPED)) { folio_zero_segment(folio, cur, end); tight = false; - } else if (map->m_flags & EROFS_MAP_FRAGMENT) { + } else if (map->m_flags & __EROFS_MAP_FRAGMENT) { erofs_off_t fpos = offset + cur - map->m_la; err = z_erofs_read_fragment(inode->i_sb, folio, cur, @@ -1091,7 +1091,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f, tight = (bs == PAGE_SIZE); } } while ((end = cur) > 0); - erofs_onlinefolio_end(folio, err); + erofs_onlinefolio_end(folio, err, false); return err; } @@ -1196,7 +1196,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_backend *be, int err) cur += len; } kunmap_local(dst); - erofs_onlinefolio_end(page_folio(bvi->bvec.page), err); + erofs_onlinefolio_end(page_folio(bvi->bvec.page), err, true); list_del(p); kfree(bvi); } @@ -1355,7 +1355,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) DBG_BUGON(z_erofs_page_is_invalidated(page)); if (!z_erofs_is_shortlived_page(page)) { - erofs_onlinefolio_end(page_folio(page), err); + erofs_onlinefolio_end(page_folio(page), err, true); continue; } if (pcl->algorithmformat != Z_EROFS_COMPRESSION_LZ4) { diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 14ea47f954f5..f1a15ff22147 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -413,8 +413,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode, !vi->z_tailextent_headlcn) { map->m_la = 0; map->m_llen = inode->i_size; - map->m_flags = EROFS_MAP_MAPPED | - EROFS_MAP_FULL_MAPPED | EROFS_MAP_FRAGMENT; + map->m_flags = EROFS_MAP_FRAGMENT; return 0; } initial_lcn = ofs >> lclusterbits; @@ -489,7 +488,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode, goto unmap_out; } } else if (fragment && m.lcn == vi->z_tailextent_headlcn) { - map->m_flags |= EROFS_MAP_FRAGMENT; + map->m_flags = EROFS_MAP_FRAGMENT; } else { map->m_pa = erofs_pos(sb, m.pblk); err = z_erofs_get_extent_compressedlen(&m, initial_lcn); @@ -597,6 +596,10 @@ static int z_erofs_map_blocks_ext(struct inode *inode, if (la > map->m_la) { r = mid; + if (la > lend) { + DBG_BUGON(1); + return -EFSCORRUPTED; + } lend = la; } else { l = mid + 1; @@ -613,7 +616,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, if (lstart < lend) { map->m_la = lstart; if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) { - map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FRAGMENT; + map->m_flags = EROFS_MAP_FRAGMENT; vi->z_fragmentoff = map->m_plen; if (recsz > offsetof(struct z_erofs_extent, pstart_lo)) vi->z_fragmentoff |= map->m_pa << 32; @@ -635,12 +638,6 @@ static int z_erofs_map_blocks_ext(struct inode *inode, } } map->m_llen = lend - map->m_la; - if (!last && map->m_llen < sb->s_blocksize) { - erofs_err(sb, "extent too small %llu @ offset %llu of nid %llu", - map->m_llen, map->m_la, vi->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; - } return 0; } @@ -799,7 +796,7 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset, iomap->length = map.m_llen; if (map.m_flags & EROFS_MAP_MAPPED) { iomap->type = IOMAP_MAPPED; - iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ? + iomap->addr = map.m_flags & __EROFS_MAP_FRAGMENT ? IOMAP_NULL_ADDR : map.m_pa; } else { iomap->type = IOMAP_HOLE; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index d4dbffdedd08..0fbf5dfedb24 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -883,7 +883,7 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) kfree_rcu(epi, rcu); percpu_counter_dec(&ep->user->epoll_watches); - return ep_refcount_dec_and_test(ep); + return true; } /* @@ -891,14 +891,14 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) */ static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi) { - WARN_ON_ONCE(__ep_remove(ep, epi, false)); + if (__ep_remove(ep, epi, false)) + WARN_ON_ONCE(ep_refcount_dec_and_test(ep)); } static void ep_clear_and_put(struct eventpoll *ep) { struct rb_node *rbp, *next; struct epitem *epi; - bool dispose; /* We need to release all tasks waiting for these file */ if (waitqueue_active(&ep->poll_wait)) @@ -931,10 +931,8 @@ static void ep_clear_and_put(struct eventpoll *ep) cond_resched(); } - dispose = ep_refcount_dec_and_test(ep); mutex_unlock(&ep->mtx); - - if (dispose) + if (ep_refcount_dec_and_test(ep)) ep_free(ep); } @@ -1137,7 +1135,7 @@ again: dispose = __ep_remove(ep, epi, true); mutex_unlock(&ep->mtx); - if (dispose) + if (dispose && ep_refcount_dec_and_test(ep)) ep_free(ep); goto again; } diff --git a/fs/exec.c b/fs/exec.c index 1f5fdd2e096e..ba400aafd640 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -114,6 +114,9 @@ static inline void put_binfmt(struct linux_binfmt * fmt) bool path_noexec(const struct path *path) { + /* If it's an anonymous inode make sure that we catch any shenanigans. */ + VFS_WARN_ON_ONCE(IS_ANON_FILE(d_inode(path->dentry)) && + !(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC)); return (path->mnt->mnt_flags & MNT_NOEXEC) || (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC); } @@ -781,13 +784,15 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) if (IS_ERR(file)) return file; + if (path_noexec(&file->f_path)) + return ERR_PTR(-EACCES); + /* * In the past the regular type check was here. It moved to may_open() in * 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is * an invariant that all non-regular files error out before we get here. */ - if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) || - path_noexec(&file->f_path)) + if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode))) return ERR_PTR(-EACCES); err = exe_file_deny_write_access(file); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6bd3de64f2a8..696131e655ed 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -35,6 +35,17 @@ #include <trace/events/f2fs.h> #include <uapi/linux/f2fs.h> +static void f2fs_zero_post_eof_page(struct inode *inode, loff_t new_size) +{ + loff_t old_size = i_size_read(inode); + + if (old_size >= new_size) + return; + + /* zero or drop pages only in range of [old_size, new_size] */ + truncate_pagecache(inode, old_size); +} + static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) { struct inode *inode = file_inode(vmf->vma->vm_file); @@ -103,8 +114,13 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT); + filemap_invalidate_unlock(inode->i_mapping); + file_update_time(vmf->vma->vm_file); filemap_invalidate_lock_shared(inode->i_mapping); + folio_lock(folio); if (unlikely(folio->mapping != inode->i_mapping || folio_pos(folio) > i_size_read(inode) || @@ -1109,6 +1125,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); + if (attr->ia_size > old_size) + f2fs_zero_post_eof_page(inode, attr->ia_size); truncate_setsize(inode, attr->ia_size); if (attr->ia_size <= old_size) @@ -1227,6 +1245,10 @@ static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len) if (ret) return ret; + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, offset + len); + filemap_invalidate_unlock(inode->i_mapping); + pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; @@ -1510,6 +1532,8 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, offset + len); + f2fs_lock_op(sbi); f2fs_drop_extent_tree(inode); truncate_pagecache(inode, offset); @@ -1631,6 +1655,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) return ret; + filemap_invalidate_lock(mapping); + f2fs_zero_post_eof_page(inode, offset + len); + filemap_invalidate_unlock(mapping); + pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; @@ -1762,6 +1790,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) /* avoid gc operation during block exchange */ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(mapping); + + f2fs_zero_post_eof_page(inode, offset + len); truncate_pagecache(inode, offset); while (!ret && idx > pg_start) { @@ -1819,6 +1849,10 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, if (err) return err; + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, offset + len); + filemap_invalidate_unlock(inode->i_mapping); + f2fs_balance_fs(sbi, true); pg_start = ((unsigned long long)offset) >> PAGE_SHIFT; @@ -4860,6 +4894,10 @@ static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from) err = file_modified(file); if (err) return err; + + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, iocb->ki_pos + iov_iter_count(from)); + filemap_invalidate_unlock(inode->i_mapping); return count; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1cb4cba7f961..bfe104db284e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2078,7 +2078,6 @@ write_node: if (!__write_node_folio(folio, false, &submitted, wbc, do_balance, io_type, NULL)) { - folio_unlock(folio); folio_batch_release(&fbatch); ret = -EIO; goto out; diff --git a/fs/file.c b/fs/file.c index 3a3146664cf3..b6db031545e6 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1198,8 +1198,12 @@ bool file_seek_cur_needs_f_lock(struct file *file) if (!(file->f_mode & FMODE_ATOMIC_POS) && !file->f_op->iterate_shared) return false; - VFS_WARN_ON_ONCE((file_count(file) > 1) && - !mutex_is_locked(&file->f_pos_lock)); + /* + * Note that we are not guaranteed to be called after fdget_pos() on + * this file obj, in which case the caller is expected to provide the + * appropriate locking. + */ + return true; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f102afc03359..47006d0753f1 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1147,7 +1147,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia, static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, struct address_space *mapping, struct iov_iter *ii, loff_t pos, - unsigned int max_pages) + unsigned int max_folios) { struct fuse_args_pages *ap = &ia->ap; struct fuse_conn *fc = get_fuse_conn(mapping->host); @@ -1157,12 +1157,11 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, int err = 0; num = min(iov_iter_count(ii), fc->max_write); - num = min(num, max_pages << PAGE_SHIFT); ap->args.in_pages = true; ap->descs[0].offset = offset; - while (num) { + while (num && ap->num_folios < max_folios) { size_t tmp; struct folio *folio; pgoff_t index = pos >> PAGE_SHIFT; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index bfe8d8af46f3..9572bdef49ee 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -9,6 +9,7 @@ #include "fuse_i.h" #include "dev_uring_i.h" +#include <linux/dax.h> #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/file.h> @@ -162,6 +163,9 @@ static void fuse_evict_inode(struct inode *inode) /* Will write inode on close/munmap and in all other dirtiers */ WARN_ON(inode->i_state & I_DIRTY_INODE); + if (FUSE_IS_DAX(inode)) + dax_break_layout_final(inode); + truncate_inode_pages_final(&inode->i_data); clear_inode(inode); if (inode->i_sb->s_flags & SB_ACTIVE) { diff --git a/fs/libfs.c b/fs/libfs.c index 9ea0ecc325a8..6f487fc6be34 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1649,12 +1649,10 @@ struct inode *alloc_anon_inode(struct super_block *s) */ inode->i_state = I_DIRTY; /* - * Historically anonymous inodes didn't have a type at all and - * userspace has come to rely on this. Internally they're just - * regular files but S_IFREG is masked off when reporting - * information to userspace. + * Historically anonymous inodes don't have a type at all and + * userspace has come to rely on this. */ - inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; + inode->i_mode = S_IRUSR | S_IWUSR; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); inode->i_flags |= S_PRIVATE | S_ANON_INODE; diff --git a/fs/namei.c b/fs/namei.c index 4bb889fc980b..c26a7ee42184 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2917,7 +2917,8 @@ static int lookup_one_common(struct mnt_idmap *idmap, * @base: base directory to lookup from * * Look up a dentry by name in the dcache, returning NULL if it does not - * currently exist. The function does not try to create a dentry. + * currently exist. The function does not try to create a dentry and if one + * is found it doesn't try to revalidate it. * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. It does no permission checking. @@ -2933,7 +2934,7 @@ struct dentry *try_lookup_noperm(struct qstr *name, struct dentry *base) if (err) return ERR_PTR(err); - return lookup_dcache(name, base, 0); + return d_lookup(base, name); } EXPORT_SYMBOL(try_lookup_noperm); @@ -3057,14 +3058,22 @@ EXPORT_SYMBOL(lookup_one_positive_unlocked); * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. It does no permission checking. * - * Unlike lookup_noperm, it should be called without the parent + * Unlike lookup_noperm(), it should be called without the parent * i_rwsem held, and will take the i_rwsem itself if necessary. + * + * Unlike try_lookup_noperm() it *does* revalidate the dentry if it already + * existed. */ struct dentry *lookup_noperm_unlocked(struct qstr *name, struct dentry *base) { struct dentry *ret; + int err; - ret = try_lookup_noperm(name, base); + err = lookup_noperm_common(name, base); + if (err) + return ERR_PTR(err); + + ret = lookup_dcache(name, base, 0); if (!ret) ret = lookup_slow(name, base, 0); return ret; @@ -3471,7 +3480,7 @@ static int may_open(struct mnt_idmap *idmap, const struct path *path, return -EACCES; break; default: - VFS_BUG_ON_INODE(1, inode); + VFS_BUG_ON_INODE(!IS_ANON_FILE(inode), inode); } error = inode_permission(idmap, inode, MAY_OPEN | acc_mode); diff --git a/fs/namespace.c b/fs/namespace.c index e13d9ab4f564..54c59e091919 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2310,21 +2310,62 @@ out: return dst_mnt; } -/* Caller should check returned pointer for errors */ +static inline bool extend_array(struct path **res, struct path **to_free, + unsigned n, unsigned *count, unsigned new_count) +{ + struct path *p; -struct vfsmount *collect_mounts(const struct path *path) + if (likely(n < *count)) + return true; + p = kmalloc_array(new_count, sizeof(struct path), GFP_KERNEL); + if (p && *count) + memcpy(p, *res, *count * sizeof(struct path)); + *count = new_count; + kfree(*to_free); + *to_free = *res = p; + return p; +} + +struct path *collect_paths(const struct path *path, + struct path *prealloc, unsigned count) { - struct mount *tree; - namespace_lock(); - if (!check_mnt(real_mount(path->mnt))) - tree = ERR_PTR(-EINVAL); - else - tree = copy_tree(real_mount(path->mnt), path->dentry, - CL_COPY_ALL | CL_PRIVATE); - namespace_unlock(); - if (IS_ERR(tree)) - return ERR_CAST(tree); - return &tree->mnt; + struct mount *root = real_mount(path->mnt); + struct mount *child; + struct path *res = prealloc, *to_free = NULL; + unsigned n = 0; + + guard(rwsem_read)(&namespace_sem); + + if (!check_mnt(root)) + return ERR_PTR(-EINVAL); + if (!extend_array(&res, &to_free, 0, &count, 32)) + return ERR_PTR(-ENOMEM); + res[n++] = *path; + list_for_each_entry(child, &root->mnt_mounts, mnt_child) { + if (!is_subdir(child->mnt_mountpoint, path->dentry)) + continue; + for (struct mount *m = child; m; m = next_mnt(m, child)) { + if (!extend_array(&res, &to_free, n, &count, 2 * count)) + return ERR_PTR(-ENOMEM); + res[n].mnt = &m->mnt; + res[n].dentry = m->mnt.mnt_root; + n++; + } + } + if (!extend_array(&res, &to_free, n, &count, count + 1)) + return ERR_PTR(-ENOMEM); + memset(res + n, 0, (count - n) * sizeof(struct path)); + for (struct path *p = res; p->mnt; p++) + path_get(p); + return res; +} + +void drop_collected_paths(struct path *paths, struct path *prealloc) +{ + for (struct path *p = paths; p->mnt; p++) + path_put(p); + if (paths != prealloc) + kfree(paths); } static void free_mnt_ns(struct mnt_namespace *); @@ -2401,15 +2442,6 @@ void dissolve_on_fput(struct vfsmount *mnt) free_mnt_ns(ns); } -void drop_collected_mounts(struct vfsmount *mnt) -{ - namespace_lock(); - lock_mount_hash(); - umount_tree(real_mount(mnt), 0); - unlock_mount_hash(); - namespace_unlock(); -} - static bool __has_locked_children(struct mount *mnt, struct dentry *dentry) { struct mount *child; @@ -2511,21 +2543,6 @@ struct vfsmount *clone_private_mount(const struct path *path) } EXPORT_SYMBOL_GPL(clone_private_mount); -int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, - struct vfsmount *root) -{ - struct mount *mnt; - int res = f(root, arg); - if (res) - return res; - list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) { - res = f(&mnt->mnt, arg); - if (res) - return res; - } - return 0; -} - static void lock_mnt_tree(struct mount *mnt) { struct mount *p; @@ -2751,14 +2768,14 @@ static int attach_recursive_mnt(struct mount *source_mnt, hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) { struct mount *q; hlist_del_init(&child->mnt_hash); - q = __lookup_mnt(&child->mnt_parent->mnt, - child->mnt_mountpoint); - if (q) - mnt_change_mountpoint(child, smp, q); /* Notice when we are propagating across user namespaces */ if (child->mnt_parent->mnt_ns->user_ns != user_ns) lock_mnt_tree(child); child->mnt.mnt_flags &= ~MNT_LOCKED; + q = __lookup_mnt(&child->mnt_parent->mnt, + child->mnt_mountpoint); + if (q) + mnt_change_mountpoint(child, smp, q); commit_tree(child); } put_mountpoint(smp); @@ -5290,16 +5307,12 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename, kattr.kflags |= MOUNT_KATTR_RECURSE; ret = wants_mount_setattr(uattr, usize, &kattr); - if (ret < 0) - return ret; - - if (ret) { + if (ret > 0) { ret = do_mount_setattr(&file->f_path, &kattr); - if (ret) - return ret; - finish_mount_kattr(&kattr); } + if (ret) + return ret; } fd = get_unused_fd_flags(flags & O_CLOEXEC); @@ -6262,7 +6275,11 @@ void put_mnt_ns(struct mnt_namespace *ns) { if (!refcount_dec_and_test(&ns->ns.count)) return; - drop_collected_mounts(&ns->root->mnt); + namespace_lock(); + lock_mount_hash(); + umount_tree(ns->root, 0); + unlock_mount_hash(); + namespace_unlock(); free_mnt_ns(ns); } diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 72a3e6db2524..f27ea5099a68 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -53,30 +53,40 @@ static struct folio *netfs_grab_folio_for_write(struct address_space *mapping, * data written into the pagecache until we can find out from the server what * the values actually are. */ -static void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, - loff_t i_size, loff_t pos, size_t copied) +void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, + loff_t pos, size_t copied) { + loff_t i_size, end = pos + copied; blkcnt_t add; size_t gap; + if (end <= i_size_read(inode)) + return; + if (ctx->ops->update_i_size) { - ctx->ops->update_i_size(inode, pos); + ctx->ops->update_i_size(inode, end); return; } - i_size_write(inode, pos); + spin_lock(&inode->i_lock); + + i_size = i_size_read(inode); + if (end > i_size) { + i_size_write(inode, end); #if IS_ENABLED(CONFIG_FSCACHE) - fscache_update_cookie(ctx->cache, NULL, &pos); + fscache_update_cookie(ctx->cache, NULL, &end); #endif - gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1)); - if (copied > gap) { - add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE); + gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1)); + if (copied > gap) { + add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE); - inode->i_blocks = min_t(blkcnt_t, - DIV_ROUND_UP(pos, SECTOR_SIZE), - inode->i_blocks + add); + inode->i_blocks = min_t(blkcnt_t, + DIV_ROUND_UP(end, SECTOR_SIZE), + inode->i_blocks + add); + } } + spin_unlock(&inode->i_lock); } /** @@ -111,7 +121,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, struct folio *folio = NULL, *writethrough = NULL; unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0; ssize_t written = 0, ret, ret2; - loff_t i_size, pos = iocb->ki_pos; + loff_t pos = iocb->ki_pos; size_t max_chunk = mapping_max_folio_size(mapping); bool maybe_trouble = false; @@ -344,10 +354,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, flush_dcache_folio(folio); /* Update the inode size if we moved the EOF marker */ + netfs_update_i_size(ctx, inode, pos, copied); pos += copied; - i_size = i_size_read(inode); - if (pos > i_size) - netfs_update_i_size(ctx, inode, i_size, pos, copied); written += copied; if (likely(!wreq)) { diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index fa9a5bf3c6d5..a16660ab7f83 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -9,20 +9,6 @@ #include <linux/uio.h> #include "internal.h" -static void netfs_cleanup_dio_write(struct netfs_io_request *wreq) -{ - struct inode *inode = wreq->inode; - unsigned long long end = wreq->start + wreq->transferred; - - if (!wreq->error && - i_size_read(inode) < end) { - if (wreq->netfs_ops->update_i_size) - wreq->netfs_ops->update_i_size(inode, end); - else - i_size_write(inode, end); - } -} - /* * Perform an unbuffered write where we may have to do an RMW operation on an * encrypted file. This can also be used for direct I/O writes. @@ -98,7 +84,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * if (async) wreq->iocb = iocb; wreq->len = iov_iter_count(&wreq->buffer.iter); - wreq->cleanup = netfs_cleanup_dio_write; ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len); if (ret < 0) { _debug("begin = %zd", ret); @@ -106,7 +91,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * } if (!async) { - trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip); ret = netfs_wait_for_write(wreq); if (ret > 0) iocb->ki_pos += ret; diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index e2ee9183392b..d4f16fefd965 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -28,6 +28,12 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, size_t offset, size_t len); /* + * buffered_write.c + */ +void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, + loff_t pos, size_t copied); + +/* * main.c */ extern unsigned int netfs_debug; @@ -267,14 +273,32 @@ static inline void netfs_wake_rreq_flag(struct netfs_io_request *rreq, enum netfs_rreq_trace trace) { if (test_bit(rreq_flag, &rreq->flags)) { - trace_netfs_rreq(rreq, trace); clear_bit_unlock(rreq_flag, &rreq->flags); smp_mb__after_atomic(); /* Set flag before task state */ + trace_netfs_rreq(rreq, trace); wake_up(&rreq->waitq); } } /* + * Test the NETFS_RREQ_IN_PROGRESS flag, inserting an appropriate barrier. + */ +static inline bool netfs_check_rreq_in_progress(const struct netfs_io_request *rreq) +{ + /* Order read of flags before read of anything else, such as error. */ + return test_bit_acquire(NETFS_RREQ_IN_PROGRESS, &rreq->flags); +} + +/* + * Test the NETFS_SREQ_IN_PROGRESS flag, inserting an appropriate barrier. + */ +static inline bool netfs_check_subreq_in_progress(const struct netfs_io_subrequest *subreq) +{ + /* Order read of flags before read of anything else, such as error. */ + return test_bit_acquire(NETFS_SREQ_IN_PROGRESS, &subreq->flags); +} + +/* * fscache-cache.c */ #ifdef CONFIG_PROC_FS diff --git a/fs/netfs/main.c b/fs/netfs/main.c index 3db401d269e7..73da6c9f5777 100644 --- a/fs/netfs/main.c +++ b/fs/netfs/main.c @@ -58,15 +58,15 @@ static int netfs_requests_seq_show(struct seq_file *m, void *v) if (v == &netfs_io_requests) { seq_puts(m, - "REQUEST OR REF FL ERR OPS COVERAGE\n" - "======== == === == ==== === =========\n" + "REQUEST OR REF FLAG ERR OPS COVERAGE\n" + "======== == === ==== ==== === =========\n" ); return 0; } rreq = list_entry(v, struct netfs_io_request, proc_link); seq_printf(m, - "%08x %s %3d %2lx %4ld %3d @%04llx %llx/%llx", + "%08x %s %3d %4lx %4ld %3d @%04llx %llx/%llx", rreq->debug_id, netfs_origins[rreq->origin], refcount_read(&rreq->ref), diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 43b67a28a8fa..20748bcfbf59 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -356,22 +356,22 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, DEFINE_WAIT(myself); list_for_each_entry(subreq, &stream->subrequests, rreq_link) { - if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + if (!netfs_check_subreq_in_progress(subreq)) continue; - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_quiesce); for (;;) { prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); - if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + if (!netfs_check_subreq_in_progress(subreq)) break; trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for); schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); } } + trace_netfs_rreq(rreq, netfs_rreq_trace_waited_quiesce); finish_wait(&rreq->waitq, &myself); } @@ -381,7 +381,12 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, static int netfs_collect_in_app(struct netfs_io_request *rreq, bool (*collector)(struct netfs_io_request *rreq)) { - bool need_collect = false, inactive = true; + bool need_collect = false, inactive = true, done = true; + + if (!netfs_check_rreq_in_progress(rreq)) { + trace_netfs_rreq(rreq, netfs_rreq_trace_recollect); + return 1; /* Done */ + } for (int i = 0; i < NR_IO_STREAMS; i++) { struct netfs_io_subrequest *subreq; @@ -395,14 +400,16 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq, struct netfs_io_subrequest, rreq_link); if (subreq && - (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || + (!netfs_check_subreq_in_progress(subreq) || test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { need_collect = true; break; } + if (subreq || !test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags)) + done = false; } - if (!need_collect && !inactive) + if (!need_collect && !inactive && !done) return 0; /* Sleep */ __set_current_state(TASK_RUNNING); @@ -423,14 +430,13 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq, /* * Wait for a request to complete, successfully or otherwise. */ -static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq, - bool (*collector)(struct netfs_io_request *rreq)) +static ssize_t netfs_wait_for_in_progress(struct netfs_io_request *rreq, + bool (*collector)(struct netfs_io_request *rreq)) { DEFINE_WAIT(myself); ssize_t ret; for (;;) { - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { @@ -440,18 +446,22 @@ static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq, case 1: goto all_collected; case 2: + if (!netfs_check_rreq_in_progress(rreq)) + break; + cond_resched(); continue; } } - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) + if (!netfs_check_rreq_in_progress(rreq)) break; + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip); schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); } all_collected: + trace_netfs_rreq(rreq, netfs_rreq_trace_waited_ip); finish_wait(&rreq->waitq, &myself); ret = rreq->error; @@ -478,12 +488,12 @@ all_collected: ssize_t netfs_wait_for_read(struct netfs_io_request *rreq) { - return netfs_wait_for_request(rreq, netfs_read_collection); + return netfs_wait_for_in_progress(rreq, netfs_read_collection); } ssize_t netfs_wait_for_write(struct netfs_io_request *rreq) { - return netfs_wait_for_request(rreq, netfs_write_collection); + return netfs_wait_for_in_progress(rreq, netfs_write_collection); } /* @@ -494,10 +504,8 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq, { DEFINE_WAIT(myself); - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); - for (;;) { - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { @@ -507,19 +515,23 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq, case 1: goto all_collected; case 2: + if (!netfs_check_rreq_in_progress(rreq) || + !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) + break; + cond_resched(); continue; } } - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) || + if (!netfs_check_rreq_in_progress(rreq) || !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) break; schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); } all_collected: + trace_netfs_rreq(rreq, netfs_rreq_trace_waited_pause); finish_wait(&rreq->waitq, &myself); } diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 96ee18af28ef..3e804da1e1eb 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -218,7 +218,7 @@ reassess: stream->collected_to = front->start; } - if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) + if (netfs_check_subreq_in_progress(front)) notes |= HIT_PENDING; smp_rmb(); /* Read counters after IN_PROGRESS flag. */ transferred = READ_ONCE(front->transferred); @@ -293,7 +293,9 @@ reassess: spin_lock(&rreq->lock); remove = front; - trace_netfs_sreq(front, netfs_sreq_trace_discard); + trace_netfs_sreq(front, + notes & ABANDON_SREQ ? + netfs_sreq_trace_abandoned : netfs_sreq_trace_consumed); list_del_init(&front->rreq_link); front = list_first_entry_or_null(&stream->subrequests, struct netfs_io_subrequest, rreq_link); @@ -353,9 +355,11 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) if (rreq->iocb) { rreq->iocb->ki_pos += rreq->transferred; - if (rreq->iocb->ki_complete) + if (rreq->iocb->ki_complete) { + trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete); rreq->iocb->ki_complete( rreq->iocb, rreq->error ? rreq->error : rreq->transferred); + } } if (rreq->netfs_ops->done) rreq->netfs_ops->done(rreq); @@ -379,9 +383,11 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq) if (rreq->iocb) { rreq->iocb->ki_pos += rreq->transferred; - if (rreq->iocb->ki_complete) + if (rreq->iocb->ki_complete) { + trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete); rreq->iocb->ki_complete( rreq->iocb, rreq->error ? rreq->error : rreq->transferred); + } } if (rreq->netfs_ops->done) rreq->netfs_ops->done(rreq); @@ -445,7 +451,7 @@ void netfs_read_collection_worker(struct work_struct *work) struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); netfs_see_request(rreq, netfs_rreq_trace_see_work); - if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) { + if (netfs_check_rreq_in_progress(rreq)) { if (netfs_read_collection(rreq)) /* Drop the ref from the IN_PROGRESS flag. */ netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index e2b102ffb768..0f3a36852a4d 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -240,7 +240,7 @@ reassess_streams: } /* Stall if the front is still undergoing I/O. */ - if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) { + if (netfs_check_subreq_in_progress(front)) { notes |= HIT_PENDING; break; } @@ -393,8 +393,10 @@ bool netfs_write_collection(struct netfs_io_request *wreq) ictx->ops->invalidate_cache(wreq); } - if (wreq->cleanup) - wreq->cleanup(wreq); + if ((wreq->origin == NETFS_UNBUFFERED_WRITE || + wreq->origin == NETFS_DIO_WRITE) && + !wreq->error) + netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred); if (wreq->origin == NETFS_DIO_WRITE && wreq->mapping->nrpages) { @@ -419,9 +421,11 @@ bool netfs_write_collection(struct netfs_io_request *wreq) if (wreq->iocb) { size_t written = min(wreq->transferred, wreq->len); wreq->iocb->ki_pos += written; - if (wreq->iocb->ki_complete) + if (wreq->iocb->ki_complete) { + trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete); wreq->iocb->ki_complete( wreq->iocb, wreq->error ? wreq->error : written); + } wreq->iocb = VFS_PTR_POISON; } @@ -434,7 +438,7 @@ void netfs_write_collection_worker(struct work_struct *work) struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); netfs_see_request(rreq, netfs_rreq_trace_see_work); - if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) { + if (netfs_check_rreq_in_progress(rreq)) { if (netfs_write_collection(rreq)) /* Drop the ref from the IN_PROGRESS flag. */ netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c index 9d1d8a8bab72..fc9c3e0d34d8 100644 --- a/fs/netfs/write_retry.c +++ b/fs/netfs/write_retry.c @@ -146,14 +146,13 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, subreq = netfs_alloc_subrequest(wreq); subreq->source = to->source; subreq->start = start; - subreq->debug_index = atomic_inc_return(&wreq->subreq_counter); subreq->stream_nr = to->stream_nr; subreq->retry_count = 1; trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, refcount_read(&subreq->ref), netfs_sreq_trace_new); - netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); + trace_netfs_sreq(subreq, netfs_sreq_trace_split); list_add(&subreq->rreq_link, &to->rreq_link); to = list_next_entry(to, rreq_link); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index df4807460596..4bea008dbebd 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1105,6 +1105,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr) } static int ff_layout_async_handle_error_v4(struct rpc_task *task, + u32 op_status, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, @@ -1115,34 +1116,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; - switch (task->tk_status) { - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_DEADSESSION: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_SEQ_FALSE_RETRY: - case -NFS4ERR_SEQ_MISORDERED: + switch (op_status) { + case NFS4_OK: + case NFS4ERR_NXIO: + break; + case NFSERR_PERM: + if (!task->tk_xprt) + break; + xprt_force_disconnect(task->tk_xprt); + goto out_retry; + case NFS4ERR_BADSESSION: + case NFS4ERR_BADSLOT: + case NFS4ERR_BAD_HIGH_SLOT: + case NFS4ERR_DEADSESSION: + case NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case NFS4ERR_SEQ_FALSE_RETRY: + case NFS4ERR_SEQ_MISORDERED: dprintk("%s ERROR %d, Reset session. Exchangeid " "flags 0x%x\n", __func__, task->tk_status, clp->cl_exchange_flags); nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - break; - case -NFS4ERR_DELAY: + goto out_retry; + case NFS4ERR_DELAY: nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); fallthrough; - case -NFS4ERR_GRACE: + case NFS4ERR_GRACE: rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX); - break; - case -NFS4ERR_RETRY_UNCACHED_REP: - break; + goto out_retry; + case NFS4ERR_RETRY_UNCACHED_REP: + goto out_retry; /* Invalidate Layout errors */ - case -NFS4ERR_PNFS_NO_LAYOUT: - case -ESTALE: /* mapped NFS4ERR_STALE */ - case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ - case -EISDIR: /* mapped NFS4ERR_ISDIR */ - case -NFS4ERR_FHEXPIRED: - case -NFS4ERR_WRONG_TYPE: + case NFS4ERR_PNFS_NO_LAYOUT: + case NFS4ERR_STALE: + case NFS4ERR_BADHANDLE: + case NFS4ERR_ISDIR: + case NFS4ERR_FHEXPIRED: + case NFS4ERR_WRONG_TYPE: dprintk("%s Invalid layout error %d\n", __func__, task->tk_status); /* @@ -1155,6 +1164,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, pnfs_destroy_layout(NFS_I(inode)); rpc_wake_up(&tbl->slot_tbl_waitq); goto reset; + default: + break; + } + + switch (task->tk_status) { /* RPC connection errors */ case -ENETDOWN: case -ENETUNREACH: @@ -1174,27 +1188,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); rpc_wake_up(&tbl->slot_tbl_waitq); - fallthrough; + break; default: - if (ff_layout_avoid_mds_available_ds(lseg)) - return -NFS4ERR_RESET_TO_PNFS; -reset: - dprintk("%s Retry through MDS. Error %d\n", __func__, - task->tk_status); - return -NFS4ERR_RESET_TO_MDS; + break; } + + if (ff_layout_avoid_mds_available_ds(lseg)) + return -NFS4ERR_RESET_TO_PNFS; +reset: + dprintk("%s Retry through MDS. Error %d\n", __func__, + task->tk_status); + return -NFS4ERR_RESET_TO_MDS; + +out_retry: task->tk_status = 0; return -EAGAIN; } /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ static int ff_layout_async_handle_error_v3(struct rpc_task *task, + u32 op_status, struct nfs_client *clp, struct pnfs_layout_segment *lseg, u32 idx) { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); + switch (op_status) { + case NFS_OK: + case NFSERR_NXIO: + break; + case NFSERR_PERM: + if (!task->tk_xprt) + break; + xprt_force_disconnect(task->tk_xprt); + goto out_retry; + case NFSERR_ACCES: + case NFSERR_BADHANDLE: + case NFSERR_FBIG: + case NFSERR_IO: + case NFSERR_NOSPC: + case NFSERR_ROFS: + case NFSERR_STALE: + goto out_reset_to_pnfs; + case NFSERR_JUKEBOX: + nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); + goto out_retry; + default: + break; + } + switch (task->tk_status) { /* File access problems. Don't mark the device as unavailable */ case -EACCES: @@ -1218,6 +1261,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); } +out_reset_to_pnfs: /* FIXME: Need to prevent infinite looping here. */ return -NFS4ERR_RESET_TO_PNFS; out_retry: @@ -1228,6 +1272,7 @@ out_retry: } static int ff_layout_async_handle_error(struct rpc_task *task, + u32 op_status, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, @@ -1246,10 +1291,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task, switch (vers) { case 3: - return ff_layout_async_handle_error_v3(task, clp, lseg, idx); - case 4: - return ff_layout_async_handle_error_v4(task, state, clp, + return ff_layout_async_handle_error_v3(task, op_status, clp, lseg, idx); + case 4: + return ff_layout_async_handle_error_v4(task, op_status, state, + clp, lseg, idx); default: /* should never happen */ WARN_ON_ONCE(1); @@ -1302,6 +1348,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, switch (status) { case NFS4ERR_DELAY: case NFS4ERR_GRACE: + case NFS4ERR_PERM: break; case NFS4ERR_NXIO: ff_layout_mark_ds_unreachable(lseg, idx); @@ -1334,7 +1381,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task, trace_ff_layout_read_error(hdr, task->tk_status); } - err = ff_layout_async_handle_error(task, hdr->args.context->state, + err = ff_layout_async_handle_error(task, hdr->res.op_status, + hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); @@ -1507,7 +1555,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task, trace_ff_layout_write_error(hdr, task->tk_status); } - err = ff_layout_async_handle_error(task, hdr->args.context->state, + err = ff_layout_async_handle_error(task, hdr->res.op_status, + hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); @@ -1556,8 +1605,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, trace_ff_layout_commit_error(data, task->tk_status); } - err = ff_layout_async_handle_error(task, NULL, data->ds_clp, - data->lseg, data->ds_commit_index); + err = ff_layout_async_handle_error(task, data->res.op_status, + NULL, data->ds_clp, data->lseg, + data->ds_commit_index); trace_nfs4_pnfs_commit_ds(data, err); switch (err) { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8ab7868807a7..a2fa6bc4d74e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2589,15 +2589,26 @@ EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); + int err; nfs_clients_init(net); if (!rpc_proc_register(net, &nn->rpcstats)) { - nfs_clients_exit(net); - return -ENOMEM; + err = -ENOMEM; + goto err_proc_rpc; } - return nfs_fs_proc_net_init(net); + err = nfs_fs_proc_net_init(net); + if (err) + goto err_proc_nfs; + + return 0; + +err_proc_nfs: + rpc_proc_unregister(net, "nfs"); +err_proc_rpc: + nfs_clients_exit(net); + return err; } static void nfs_net_exit(struct net *net) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3adb7d0dbec7..1a7ec68bde15 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2059,8 +2059,10 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo) static void nfs_layoutget_end(struct pnfs_layout_hdr *lo) { if (atomic_dec_and_test(&lo->plh_outstanding) && - test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) + test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) { + smp_mb__after_atomic(); wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN); + } } static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index ccb00aa93be0..e00b2aea8da2 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1409,6 +1409,7 @@ void nfsd41_cb_referring_call(struct nfsd4_callback *cb, out: if (!rcl->__nr_referring_calls) { cb->cb_nr_referring_call_list--; + list_del(&rcl->__list); kfree(rcl); } } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 3f3e9f6c4250..6a42cc7a845a 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1611,7 +1611,7 @@ out_unlock: */ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) { - int *nthreads, count = 0, nrpools, i, ret = -EOPNOTSUPP, rem; + int *nthreads, nrpools = 0, i, ret = -EOPNOTSUPP, rem; struct net *net = genl_info_net(info); struct nfsd_net *nn = net_generic(net, nfsd_net_id); const struct nlattr *attr; @@ -1623,12 +1623,11 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) /* count number of SERVER_THREADS values */ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { if (nla_type(attr) == NFSD_A_SERVER_THREADS) - count++; + nrpools++; } mutex_lock(&nfsd_mutex); - nrpools = max(count, nfsd_nrpools(net)); nthreads = kcalloc(nrpools, sizeof(int), GFP_KERNEL); if (!nthreads) { ret = -ENOMEM; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 1fc68b59fa5d..76d6248b625e 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -1418,7 +1418,7 @@ out: bool ovl_lower_positive(struct dentry *dentry) { struct ovl_entry *poe = OVL_E(dentry->d_parent); - struct qstr *name = &dentry->d_name; + const struct qstr *name = &dentry->d_name; const struct cred *old_cred; unsigned int i; bool positive = false; @@ -1441,9 +1441,15 @@ bool ovl_lower_positive(struct dentry *dentry) struct dentry *this; struct ovl_path *parentpath = &ovl_lowerstack(poe)[i]; + /* + * We need to make a non-const copy of dentry->d_name, + * because lookup_one_positive_unlocked() will hash name + * with parentpath base, which is on another (lower fs). + */ this = lookup_one_positive_unlocked( mnt_idmap(parentpath->layer->mnt), - name, parentpath->dentry); + &QSTR_LEN(name->name, name->len), + parentpath->dentry); if (IS_ERR(this)) { switch (PTR_ERR(this)) { case -ENOENT: diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 239a9c3642c0..55806bd36faa 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -246,9 +246,11 @@ static inline struct dentry *ovl_do_mkdir(struct ovl_fs *ofs, struct dentry *dentry, umode_t mode) { - dentry = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); - pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(dentry)); - return dentry; + struct dentry *ret; + + ret = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); + pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(ret)); + return ret; } static inline int ovl_do_mknod(struct ovl_fs *ofs, diff --git a/fs/pidfs.c b/fs/pidfs.c index c1f0a067be40..69919be1c9d8 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -366,7 +366,7 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) kinfo.pid = task_pid_vnr(task); kinfo.mask |= PIDFD_INFO_PID; - if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1)) + if (kinfo.pid == 0 || kinfo.tgid == 0) return -ESRCH; copy_out: diff --git a/fs/pnode.h b/fs/pnode.h index 34b6247af01d..2d026fb98b18 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -28,8 +28,6 @@ #define CL_SHARED_TO_SLAVE 0x20 #define CL_COPY_MNT_NS_FILE 0x40 -#define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE) - static inline void set_mnt_shared(struct mount *mnt) { mnt->mnt.mnt_flags &= ~MNT_SHARED_MASK; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index a3eb3b740f76..3604b616311c 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -42,7 +42,7 @@ static void proc_evict_inode(struct inode *inode) head = ei->sysctl; if (head) { - RCU_INIT_POINTER(ei->sysctl, NULL); + WRITE_ONCE(ei->sysctl, NULL); proc_sys_evict_inode(inode, head); } } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index cc9d74a06ff0..08b78150cdde 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -918,17 +918,21 @@ static int proc_sys_compare(const struct dentry *dentry, struct ctl_table_header *head; struct inode *inode; - /* Although proc doesn't have negative dentries, rcu-walk means - * that inode here can be NULL */ - /* AV: can it, indeed? */ - inode = d_inode_rcu(dentry); - if (!inode) - return 1; if (name->len != len) return 1; if (memcmp(name->name, str, len)) return 1; - head = rcu_dereference(PROC_I(inode)->sysctl); + + // false positive is fine here - we'll recheck anyway + if (d_in_lookup(dentry)) + return 0; + + inode = d_inode_rcu(dentry); + // we just might have run into dentry in the middle of __dentry_kill() + if (!inode) + return 1; + + head = READ_ONCE(PROC_I(inode)->sysctl); return !head || !sysctl_is_seen(head); } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 27972c0749e7..751479eb128f 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -36,9 +36,9 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) unsigned long text, lib, swap, anon, file, shmem; unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; - anon = get_mm_counter(mm, MM_ANONPAGES); - file = get_mm_counter(mm, MM_FILEPAGES); - shmem = get_mm_counter(mm, MM_SHMEMPAGES); + anon = get_mm_counter_sum(mm, MM_ANONPAGES); + file = get_mm_counter_sum(mm, MM_FILEPAGES); + shmem = get_mm_counter_sum(mm, MM_SHMEMPAGES); /* * Note: to minimize their overhead, mm maintains hiwater_vm and @@ -59,7 +59,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) text = min(text, mm->exec_vm << PAGE_SHIFT); lib = (mm->exec_vm << PAGE_SHIFT) - text; - swap = get_mm_counter(mm, MM_SWAPENTS); + swap = get_mm_counter_sum(mm, MM_SWAPENTS); SEQ_PUT_DEC("VmPeak:\t", hiwater_vm); SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm); SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm); @@ -92,12 +92,12 @@ unsigned long task_statm(struct mm_struct *mm, unsigned long *shared, unsigned long *text, unsigned long *data, unsigned long *resident) { - *shared = get_mm_counter(mm, MM_FILEPAGES) + - get_mm_counter(mm, MM_SHMEMPAGES); + *shared = get_mm_counter_sum(mm, MM_FILEPAGES) + + get_mm_counter_sum(mm, MM_SHMEMPAGES); *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> PAGE_SHIFT; *data = mm->data_vm + mm->stack_vm; - *resident = *shared + get_mm_counter(mm, MM_ANONPAGES); + *resident = *shared + get_mm_counter_sum(mm, MM_ANONPAGES); return mm->total_vm; } @@ -2182,7 +2182,7 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p, categories |= PAGE_IS_FILE; } - if (is_zero_pfn(pmd_pfn(pmd))) + if (is_huge_zero_pmd(pmd)) categories |= PAGE_IS_PFNZERO; if (pmd_soft_dirty(pmd)) categories |= PAGE_IS_SOFT_DIRTY; diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 6ed2dfd4dbbd..d98e0d2de09f 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -594,9 +594,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) struct rmid_read rr = {0}; struct rdt_mon_domain *d; struct rdtgroup *rdtgrp; + int domid, cpu, ret = 0; struct rdt_resource *r; + struct cacheinfo *ci; struct mon_data *md; - int domid, ret = 0; rdtgrp = rdtgroup_kn_lock_live(of->kn); if (!rdtgrp) { @@ -623,10 +624,14 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) * one that matches this cache id. */ list_for_each_entry(d, &r->mon_domains, hdr.list) { - if (d->ci->id == domid) { - rr.ci = d->ci; + if (d->ci_id == domid) { + rr.ci_id = d->ci_id; + cpu = cpumask_any(&d->hdr.cpu_mask); + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci) + continue; mon_event_read(&rr, r, NULL, rdtgrp, - &d->ci->shared_cpu_map, evtid, false); + &ci->shared_cpu_map, evtid, false); goto checkresult; } } diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 9a8cf6f11151..0a1eedba2b03 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -98,7 +98,7 @@ struct mon_data { * domains in @r sharing L3 @ci.id * @evtid: Which monitor event to read. * @first: Initialize MBM counter when true. - * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains. + * @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains. * @err: Error encountered when reading counter. * @val: Returned value of event counter. If @rgrp is a parent resource group, * @val includes the sum of event counts from its child resource groups. @@ -112,7 +112,7 @@ struct rmid_read { struct rdt_mon_domain *d; enum resctrl_event_id evtid; bool first; - struct cacheinfo *ci; + unsigned int ci_id; int err; u64 val; void *arch_mon_ctx; diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index bde2801289d3..f5637855c3ac 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -361,6 +361,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) { int cpu = smp_processor_id(); struct rdt_mon_domain *d; + struct cacheinfo *ci; struct mbm_state *m; int err, ret; u64 tval = 0; @@ -388,7 +389,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) } /* Summing domains that share a cache, must be on a CPU for that cache. */ - if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map)) + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci || ci->id != rr->ci_id) return -EINVAL; /* @@ -400,7 +402,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) */ ret = -EINVAL; list_for_each_entry(d, &rr->r->mon_domains, hdr.list) { - if (d->ci->id != rr->ci->id) + if (d->ci_id != rr->ci_id) continue; err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, rr->evtid, &tval, rr->arch_mon_ctx); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 1beb124e25f6..77d08229d855 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -3036,7 +3036,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, char name[32]; snc_mode = r->mon_scope == RESCTRL_L3_NODE; - sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); + sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id); if (snc_mode) sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id); @@ -3061,7 +3061,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, return -EPERM; list_for_each_entry(mevt, &r->evt_list, list) { - domid = do_sum ? d->ci->id : d->hdr.id; + domid = do_sum ? d->ci_id : d->hdr.id; priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum); if (WARN_ON_ONCE(!priv)) return -EINVAL; @@ -3089,7 +3089,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, lockdep_assert_held(&rdtgroup_mutex); snc_mode = r->mon_scope == RESCTRL_L3_NODE; - sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); + sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id); kn = kernfs_find_and_get(parent_kn, name); if (kn) { /* diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 5200a0f3cafc..368e870624da 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -509,8 +509,17 @@ void close_all_cached_dirs(struct cifs_sb_info *cifs_sb) spin_lock(&cfids->cfid_list_lock); list_for_each_entry(cfid, &cfids->entries, entry) { tmp_list = kmalloc(sizeof(*tmp_list), GFP_ATOMIC); - if (tmp_list == NULL) - break; + if (tmp_list == NULL) { + /* + * If the malloc() fails, we won't drop all + * dentries, and unmounting is likely to trigger + * a 'Dentry still in use' error. + */ + cifs_tcon_dbg(VFS, "Out of memory while dropping dentries\n"); + spin_unlock(&cfids->cfid_list_lock); + spin_unlock(&cifs_sb->tlink_tree_lock); + goto done; + } spin_lock(&cfid->fid_lock); tmp_list->dentry = cfid->dentry; cfid->dentry = NULL; @@ -522,6 +531,7 @@ void close_all_cached_dirs(struct cifs_sb_info *cifs_sb) } spin_unlock(&cifs_sb->tlink_tree_lock); +done: list_for_each_entry_safe(tmp_list, q, &entry, entry) { list_del(&tmp_list->entry); dput(tmp_list->dentry); diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h index bc8a812ff95f..a28f7cae3caa 100644 --- a/fs/smb/client/cached_dir.h +++ b/fs/smb/client/cached_dir.h @@ -26,7 +26,7 @@ struct cached_dirents { * open file instance. */ struct mutex de_mutex; - int pos; /* Expected ctx->pos */ + loff_t pos; /* Expected ctx->pos */ struct list_head entries; }; diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index c0196be0e65f..3fdf75737d43 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -1105,7 +1105,7 @@ static ssize_t cifs_security_flags_proc_write(struct file *file, if ((count < 1) || (count > 11)) return -EINVAL; - memset(flags_string, 0, 12); + memset(flags_string, 0, sizeof(flags_string)); if (copy_from_user(flags_string, buffer, count)) return -EFAULT; diff --git a/fs/smb/client/cifs_ioctl.h b/fs/smb/client/cifs_ioctl.h index 26327442e383..b51ce64fcccf 100644 --- a/fs/smb/client/cifs_ioctl.h +++ b/fs/smb/client/cifs_ioctl.h @@ -61,7 +61,7 @@ struct smb_query_info { struct smb3_key_debug_info { __u64 Suid; __u16 cipher_type; - __u8 auth_key[16]; /* SMB2_NTLMV2_SESSKEY_SIZE */ + __u8 auth_key[SMB2_NTLMV2_SESSKEY_SIZE]; __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE]; } __packed; diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 45e94e18f4d5..89160bc34d35 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -709,6 +709,7 @@ inc_rfc1001_len(void *buf, int count) struct TCP_Server_Info { struct list_head tcp_ses_list; struct list_head smb_ses_list; + struct list_head rlist; /* reconnect list */ spinlock_t srv_lock; /* protect anything here that is not protected */ __u64 conn_id; /* connection identifier (useful for debugging) */ int srv_count; /* reference counter */ @@ -776,6 +777,7 @@ struct TCP_Server_Info { __le32 session_key_id; /* retrieved from negotiate response and send in session setup request */ struct session_key session_key; unsigned long lstrp; /* when we got last response from this server */ + unsigned long neg_start; /* when negotiate started (jiffies) */ struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */ #define CIFS_NEGFLAVOR_UNENCAP 1 /* wct == 17, but no ext_sec */ #define CIFS_NEGFLAVOR_EXTENDED 2 /* wct == 17, ext_sec bit set */ @@ -1302,6 +1304,7 @@ struct cifs_tcon { bool use_persistent:1; /* use persistent instead of durable handles */ bool no_lease:1; /* Do not request leases on files or directories */ bool use_witness:1; /* use witness protocol */ + bool dummy:1; /* dummy tcon used for reconnecting channels */ __le32 capabilities; __u32 share_flags; __u32 maximal_access; diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 66093fa78aed..045227ed4efc 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -136,6 +136,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid, struct smb_hdr *out_buf, int *bytes_returned); +void smb2_query_server_interfaces(struct work_struct *work); void cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, bool all_channels); diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 7216fcec79e8..75142f49d65d 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1334,7 +1334,12 @@ cifs_readv_callback(struct mid_q_entry *mid) cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted); + goto do_retry; case MID_RETRY_NEEDED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed); +do_retry: + __set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags); rdata->result = -EAGAIN; if (server->sign && rdata->got_bytes) /* reset bytes number since we can not check a sign */ @@ -1343,8 +1348,14 @@ cifs_readv_callback(struct mid_q_entry *mid) task_io_account_read(rdata->got_bytes); cifs_stats_bytes_read(tcon, rdata->got_bytes); break; + case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed); + rdata->result = -EIO; + break; default: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown); rdata->result = -EIO; + break; } if (rdata->result == -ENODATA) { @@ -1713,10 +1724,21 @@ cifs_writev_callback(struct mid_q_entry *mid) } break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); + result = -EAGAIN; + break; case MID_RETRY_NEEDED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); result = -EAGAIN; break; + case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed); + result = -EIO; + break; default: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown); result = -EIO; break; } diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index c4fb80b37738..205f547ca49e 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -97,7 +97,7 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server) return rc; } -static void smb2_query_server_interfaces(struct work_struct *work) +void smb2_query_server_interfaces(struct work_struct *work) { int rc; int xid; @@ -124,6 +124,14 @@ static void smb2_query_server_interfaces(struct work_struct *work) (SMB_INTERFACE_POLL_INTERVAL * HZ)); } +#define set_need_reco(server) \ +do { \ + spin_lock(&server->srv_lock); \ + if (server->tcpStatus != CifsExiting) \ + server->tcpStatus = CifsNeedReconnect; \ + spin_unlock(&server->srv_lock); \ +} while (0) + /* * Update the tcpStatus for the server. * This is used to signal the cifsd thread to call cifs_reconnect @@ -137,39 +145,45 @@ void cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, bool all_channels) { - struct TCP_Server_Info *pserver; + struct TCP_Server_Info *nserver; struct cifs_ses *ses; + LIST_HEAD(reco); int i; - /* If server is a channel, select the primary channel */ - pserver = SERVER_IS_CHAN(server) ? server->primary_server : server; - /* if we need to signal just this channel */ if (!all_channels) { - spin_lock(&server->srv_lock); - if (server->tcpStatus != CifsExiting) - server->tcpStatus = CifsNeedReconnect; - spin_unlock(&server->srv_lock); + set_need_reco(server); return; } - spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { - if (cifs_ses_exiting(ses)) - continue; - spin_lock(&ses->chan_lock); - for (i = 0; i < ses->chan_count; i++) { - if (!ses->chans[i].server) + if (SERVER_IS_CHAN(server)) + server = server->primary_server; + scoped_guard(spinlock, &cifs_tcp_ses_lock) { + set_need_reco(server); + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + spin_lock(&ses->ses_lock); + if (ses->ses_status == SES_EXITING) { + spin_unlock(&ses->ses_lock); continue; - - spin_lock(&ses->chans[i].server->srv_lock); - if (ses->chans[i].server->tcpStatus != CifsExiting) - ses->chans[i].server->tcpStatus = CifsNeedReconnect; - spin_unlock(&ses->chans[i].server->srv_lock); + } + spin_lock(&ses->chan_lock); + for (i = 1; i < ses->chan_count; i++) { + nserver = ses->chans[i].server; + if (!nserver) + continue; + nserver->srv_count++; + list_add(&nserver->rlist, &reco); + } + spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); } - spin_unlock(&ses->chan_lock); } - spin_unlock(&cifs_tcp_ses_lock); + + list_for_each_entry_safe(server, nserver, &reco, rlist) { + list_del_init(&server->rlist); + set_need_reco(server); + cifs_put_tcp_session(server, 0); + } } /* @@ -665,12 +679,12 @@ server_unresponsive(struct TCP_Server_Info *server) /* * If we're in the process of mounting a share or reconnecting a session * and the server abruptly shut down (e.g. socket wasn't closed, packet - * had been ACK'ed but no SMB response), don't wait longer than 20s to - * negotiate protocol. + * had been ACK'ed but no SMB response), don't wait longer than 20s from + * when negotiate actually started. */ spin_lock(&server->srv_lock); if (server->tcpStatus == CifsInNegotiate && - time_after(jiffies, server->lstrp + 20 * HZ)) { + time_after(jiffies, server->neg_start + 20 * HZ)) { spin_unlock(&server->srv_lock); cifs_reconnect(server, false); return true; @@ -2866,20 +2880,14 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) tcon->max_cached_dirs = ctx->max_cached_dirs; tcon->nodelete = ctx->nodelete; tcon->local_lease = ctx->local_lease; - INIT_LIST_HEAD(&tcon->pending_opens); tcon->status = TID_GOOD; - INIT_DELAYED_WORK(&tcon->query_interfaces, - smb2_query_server_interfaces); if (ses->server->dialect >= SMB30_PROT_ID && (ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { /* schedule query interfaces poll */ queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, (SMB_INTERFACE_POLL_INTERVAL * HZ)); } -#ifdef CONFIG_CIFS_DFS_UPCALL - INIT_DELAYED_WORK(&tcon->dfs_cache_work, dfs_cache_refresh); -#endif spin_lock(&cifs_tcp_ses_lock); list_add(&tcon->tcon_list, &ses->tcon_list); spin_unlock(&cifs_tcp_ses_lock); @@ -4199,7 +4207,9 @@ retry: return 0; } + server->lstrp = jiffies; server->tcpStatus = CifsInNegotiate; + server->neg_start = jiffies; spin_unlock(&server->srv_lock); rc = server->ops->negotiate(xid, ses, server); diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 9835672267d2..e9212da32f01 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -52,6 +52,7 @@ static void cifs_prepare_write(struct netfs_io_subrequest *subreq) struct netfs_io_stream *stream = &req->rreq.io_streams[subreq->stream_nr]; struct TCP_Server_Info *server; struct cifsFileInfo *open_file = req->cfile; + struct cifs_sb_info *cifs_sb = CIFS_SB(wdata->rreq->inode->i_sb); size_t wsize = req->rreq.wsize; int rc; @@ -63,6 +64,10 @@ static void cifs_prepare_write(struct netfs_io_subrequest *subreq) server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); wdata->server = server; + if (cifs_sb->ctx->wsize == 0) + cifs_negotiate_wsize(server, cifs_sb->ctx, + tlink_tcon(req->cfile->tlink)); + retry: if (open_file->invalidHandle) { rc = cifs_reopen_file(open_file, false); @@ -160,10 +165,9 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq) server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); rdata->server = server; - if (cifs_sb->ctx->rsize == 0) { + if (cifs_sb->ctx->rsize == 0) cifs_negotiate_rsize(server, cifs_sb->ctx, tlink_tcon(req->cfile->tlink)); - } rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &size, &rdata->credits); diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index a634a34d4086..59ccc2229ab3 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1824,10 +1824,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, cifs_errorf(fc, "symlinkroot mount options must be absolute path\n"); goto cifs_parse_mount_err; } - kfree(ctx->symlinkroot); - ctx->symlinkroot = kstrdup(param->string, GFP_KERNEL); - if (!ctx->symlinkroot) + if (strnlen(param->string, PATH_MAX) == PATH_MAX) { + cifs_errorf(fc, "symlinkroot path too long (max path length: %u)\n", + PATH_MAX - 1); goto cifs_parse_mount_err; + } + kfree(ctx->symlinkroot); + ctx->symlinkroot = param->string; + param->string = NULL; break; } /* case Opt_ignore: - is ignored as expected ... */ @@ -1837,13 +1841,6 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, goto cifs_parse_mount_err; } - /* - * By default resolve all native absolute symlinks relative to "/mnt/". - * Same default has drvfs driver running in WSL for resolving SMB shares. - */ - if (!ctx->symlinkroot) - ctx->symlinkroot = kstrdup("/mnt/", GFP_KERNEL); - return 0; cifs_parse_mount_err: diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index 56439da4f119..0a9935ce05a5 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -506,7 +506,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) le16_to_cpu(tcon->ses->server->cipher_type); pkey_inf.Suid = tcon->ses->Suid; memcpy(pkey_inf.auth_key, tcon->ses->auth_key.response, - 16 /* SMB2_NTLMV2_SESSKEY_SIZE */); + SMB2_NTLMV2_SESSKEY_SIZE); memcpy(pkey_inf.smb3decryptionkey, tcon->ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE); memcpy(pkey_inf.smb3encryptionkey, diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index e77017f47084..da23cc12a52c 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -151,6 +151,12 @@ tcon_info_alloc(bool dir_leases_enabled, enum smb3_tcon_ref_trace trace) #ifdef CONFIG_CIFS_DFS_UPCALL INIT_LIST_HEAD(&ret_buf->dfs_ses_list); #endif + INIT_LIST_HEAD(&ret_buf->pending_opens); + INIT_DELAYED_WORK(&ret_buf->query_interfaces, + smb2_query_server_interfaces); +#ifdef CONFIG_CIFS_DFS_UPCALL + INIT_DELAYED_WORK(&ret_buf->dfs_cache_work, dfs_cache_refresh); +#endif return ret_buf; } diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index ba0193cf9033..4e5460206397 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -264,7 +264,7 @@ cifs_posix_to_fattr(struct cifs_fattr *fattr, struct smb2_posix_info *info, /* The Mode field in the response can now include the file type as well */ fattr->cf_mode = wire_mode_to_posix(le32_to_cpu(info->Mode), fattr->cf_cifsattrs & ATTR_DIRECTORY); - fattr->cf_dtype = S_DT(le32_to_cpu(info->Mode)); + fattr->cf_dtype = S_DT(fattr->cf_mode); switch (fattr->cf_mode & S_IFMT) { case S_IFLNK: diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index bb25e77c5540..5fa29a97ac15 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -57,6 +57,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, struct reparse_symlink_data_buffer *buf = NULL; struct cifs_open_info_data data = {}; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + const char *symroot = cifs_sb->ctx->symlinkroot; struct inode *new; struct kvec iov; __le16 *path = NULL; @@ -82,7 +83,8 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, .symlink_target = symlink_target, }; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') { + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && + symroot && symname[0] == '/') { /* * This is a request to create an absolute symlink on the server * which does not support POSIX paths, and expects symlink in @@ -92,7 +94,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, * ensure compatibility of this symlink stored in absolute form * on the SMB server. */ - if (!strstarts(symname, cifs_sb->ctx->symlinkroot)) { + if (!strstarts(symname, symroot)) { /* * If the absolute Linux symlink target path is not * inside "symlinkroot" location then there is no way @@ -101,12 +103,12 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, cifs_dbg(VFS, "absolute symlink '%s' cannot be converted to NT format " "because it is outside of symlinkroot='%s'\n", - symname, cifs_sb->ctx->symlinkroot); + symname, symroot); rc = -EINVAL; goto out; } - len = strlen(cifs_sb->ctx->symlinkroot); - if (cifs_sb->ctx->symlinkroot[len-1] != '/') + len = strlen(symroot); + if (symroot[len - 1] != '/') len++; if (symname[len] >= 'a' && symname[len] <= 'z' && (symname[len+1] == '/' || symname[len+1] == '\0')) { @@ -782,6 +784,7 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, const char *full_path, struct cifs_sb_info *cifs_sb) { + const char *symroot = cifs_sb->ctx->symlinkroot; char sep = CIFS_DIR_SEP(cifs_sb); char *linux_target = NULL; char *smb_target = NULL; @@ -815,7 +818,8 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, goto out; } - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && !relative) { + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && + symroot && !relative) { /* * This is an absolute symlink from the server which does not * support POSIX paths, so the symlink is in NT-style path. @@ -875,15 +879,8 @@ globalroot: abs_path += sizeof("\\DosDevices\\")-1; else if (strstarts(abs_path, "\\GLOBAL??\\")) abs_path += sizeof("\\GLOBAL??\\")-1; - else { - /* Unhandled absolute symlink, points outside of DOS/Win32 */ - cifs_dbg(VFS, - "absolute symlink '%s' cannot be converted from NT format " - "because points to unknown target\n", - smb_target); - rc = -EIO; - goto out; - } + else + goto out_unhandled_target; /* Sometimes path separator after \?? is double backslash */ if (abs_path[0] == '\\') @@ -910,25 +907,19 @@ globalroot: abs_path++; abs_path[0] = drive_letter; } else { - /* Unhandled absolute symlink. Report an error. */ - cifs_dbg(VFS, - "absolute symlink '%s' cannot be converted from NT format " - "because points to unknown target\n", - smb_target); - rc = -EIO; - goto out; + goto out_unhandled_target; } abs_path_len = strlen(abs_path)+1; - symlinkroot_len = strlen(cifs_sb->ctx->symlinkroot); - if (cifs_sb->ctx->symlinkroot[symlinkroot_len-1] == '/') + symlinkroot_len = strlen(symroot); + if (symroot[symlinkroot_len - 1] == '/') symlinkroot_len--; linux_target = kmalloc(symlinkroot_len + 1 + abs_path_len, GFP_KERNEL); if (!linux_target) { rc = -ENOMEM; goto out; } - memcpy(linux_target, cifs_sb->ctx->symlinkroot, symlinkroot_len); + memcpy(linux_target, symroot, symlinkroot_len); linux_target[symlinkroot_len] = '/'; memcpy(linux_target + symlinkroot_len + 1, abs_path, abs_path_len); } else if (smb_target[0] == sep && relative) { @@ -966,6 +957,7 @@ globalroot: * These paths have same format as Linux symlinks, so no * conversion is needed. */ +out_unhandled_target: linux_target = smb_target; smb_target = NULL; } @@ -1172,7 +1164,6 @@ out: if (!have_xattr_dev && (tag == IO_REPARSE_TAG_LX_CHR || tag == IO_REPARSE_TAG_LX_BLK)) return false; - fattr->cf_dtype = S_DT(fattr->cf_mode); return true; } diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index ec0db32c7d98..330bc3d25bad 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -498,8 +498,7 @@ cifs_ses_add_channel(struct cifs_ses *ses, ctx->domainauto = ses->domainAuto; ctx->domainname = ses->domainName; - /* no hostname for extra channels */ - ctx->server_hostname = ""; + ctx->server_hostname = ses->server->hostname; ctx->username = ses->user_name; ctx->password = ses->password; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index a717be1626a3..2df93a75e3b8 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -424,9 +424,9 @@ skip_sess_setup: free_xid(xid); ses->flags &= ~CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; - /* regardless of rc value, setup polling */ - queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, - (SMB_INTERFACE_POLL_INTERVAL * HZ)); + if (!tcon->ipc && !tcon->dummy) + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); mutex_unlock(&ses->session_mutex); @@ -4229,10 +4229,8 @@ void smb2_reconnect_server(struct work_struct *work) } goto done; } - tcon->status = TID_GOOD; - tcon->retry = false; - tcon->need_reconnect = false; + tcon->dummy = true; /* now reconnect sessions for necessary channels */ list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) { @@ -4567,7 +4565,11 @@ smb2_readv_callback(struct mid_q_entry *mid) cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted); + goto do_retry; case MID_RETRY_NEEDED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed); +do_retry: __set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags); rdata->result = -EAGAIN; if (server->sign && rdata->got_bytes) @@ -4578,11 +4580,15 @@ smb2_readv_callback(struct mid_q_entry *mid) cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed); credits.value = le16_to_cpu(shdr->CreditRequest); credits.instance = server->reconnect_instance; - fallthrough; + rdata->result = -EIO; + break; default: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown); rdata->result = -EIO; + break; } #ifdef CONFIG_CIFS_SMB_DIRECT /* @@ -4835,11 +4841,14 @@ smb2_writev_callback(struct mid_q_entry *mid) switch (mid->mid_state) { case MID_RESPONSE_RECEIVED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress); credits.value = le16_to_cpu(rsp->hdr.CreditRequest); credits.instance = server->reconnect_instance; result = smb2_check_receive(mid, server, 0); - if (result != 0) + if (result != 0) { + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_bad); break; + } written = le32_to_cpu(rsp->DataLength); /* @@ -4861,14 +4870,23 @@ smb2_writev_callback(struct mid_q_entry *mid) } break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); + result = -EAGAIN; + break; case MID_RETRY_NEEDED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); result = -EAGAIN; break; case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed); credits.value = le16_to_cpu(rsp->hdr.CreditRequest); credits.instance = server->reconnect_instance; - fallthrough; + result = -EIO; + break; default: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown); result = -EIO; break; } @@ -4908,7 +4926,6 @@ smb2_writev_callback(struct mid_q_entry *mid) server->credits, server->in_flight, 0, cifs_trace_rw_credits_write_response_clear); wdata->credits.value = 0; - trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress); cifs_write_subrequest_terminated(wdata, result ?: written); release_mid(mid); trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 5ae847919da5..754e94a0e07f 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -907,8 +907,10 @@ wait_send_queue: .local_dma_lkey = sc->ib.pd->local_dma_lkey, .direction = DMA_TO_DEVICE, }; + size_t payload_len = umin(*_remaining_data_length, + sp->max_send_size - sizeof(*packet)); - rc = smb_extract_iter_to_rdma(iter, *_remaining_data_length, + rc = smb_extract_iter_to_rdma(iter, payload_len, &extract); if (rc < 0) goto err_dma; @@ -1013,6 +1015,27 @@ static int smbd_post_send_empty(struct smbd_connection *info) return smbd_post_send_iter(info, NULL, &remaining_data_length); } +static int smbd_post_send_full_iter(struct smbd_connection *info, + struct iov_iter *iter, + int *_remaining_data_length) +{ + int rc = 0; + + /* + * smbd_post_send_iter() respects the + * negotiated max_send_size, so we need to + * loop until the full iter is posted + */ + + while (iov_iter_count(iter) > 0) { + rc = smbd_post_send_iter(info, iter, _remaining_data_length); + if (rc < 0) + break; + } + + return rc; +} + /* * Post a receive request to the transport * The remote peer can only send data when a receive request is posted @@ -1452,6 +1475,9 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) char name[MAX_NAME_LEN]; int rc; + if (WARN_ON_ONCE(sp->max_recv_size < sizeof(struct smbdirect_data_transfer))) + return -ENOMEM; + scnprintf(name, MAX_NAME_LEN, "smbd_request_%p", info); info->request_cache = kmem_cache_create( @@ -1469,12 +1495,17 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) goto out1; scnprintf(name, MAX_NAME_LEN, "smbd_response_%p", info); + + struct kmem_cache_args response_args = { + .align = __alignof__(struct smbd_response), + .useroffset = (offsetof(struct smbd_response, packet) + + sizeof(struct smbdirect_data_transfer)), + .usersize = sp->max_recv_size - sizeof(struct smbdirect_data_transfer), + }; info->response_cache = - kmem_cache_create( - name, - sizeof(struct smbd_response) + - sp->max_recv_size, - 0, SLAB_HWCACHE_ALIGN, NULL); + kmem_cache_create(name, + sizeof(struct smbd_response) + sp->max_recv_size, + &response_args, SLAB_HWCACHE_ALIGN); if (!info->response_cache) goto out2; @@ -1747,35 +1778,39 @@ try_again: } /* - * Receive data from receive reassembly queue + * Receive data from the transport's receive reassembly queue * All the incoming data packets are placed in reassembly queue - * buf: the buffer to read data into + * iter: the buffer to read data into * size: the length of data to read * return value: actual data read - * Note: this implementation copies the data from reassebmly queue to receive + * + * Note: this implementation copies the data from reassembly queue to receive * buffers used by upper layer. This is not the optimal code path. A better way * to do it is to not have upper layer allocate its receive buffers but rather * borrow the buffer from reassembly queue, and return it after data is * consumed. But this will require more changes to upper layer code, and also * need to consider packet boundaries while they still being reassembled. */ -static int smbd_recv_buf(struct smbd_connection *info, char *buf, - unsigned int size) +int smbd_recv(struct smbd_connection *info, struct msghdr *msg) { struct smbdirect_socket *sc = &info->socket; struct smbd_response *response; struct smbdirect_data_transfer *data_transfer; + size_t size = iov_iter_count(&msg->msg_iter); int to_copy, to_read, data_read, offset; u32 data_length, remaining_data_length, data_offset; int rc; + if (WARN_ON_ONCE(iov_iter_rw(&msg->msg_iter) == WRITE)) + return -EINVAL; /* It's a bug in upper layer to get there */ + again: /* * No need to hold the reassembly queue lock all the time as we are * the only one reading from the front of the queue. The transport * may add more entries to the back of the queue at the same time */ - log_read(INFO, "size=%d info->reassembly_data_length=%d\n", size, + log_read(INFO, "size=%zd info->reassembly_data_length=%d\n", size, info->reassembly_data_length); if (info->reassembly_data_length >= size) { int queue_length; @@ -1813,7 +1848,10 @@ again: if (response->first_segment && size == 4) { unsigned int rfc1002_len = data_length + remaining_data_length; - *((__be32 *)buf) = cpu_to_be32(rfc1002_len); + __be32 rfc1002_hdr = cpu_to_be32(rfc1002_len); + if (copy_to_iter(&rfc1002_hdr, sizeof(rfc1002_hdr), + &msg->msg_iter) != sizeof(rfc1002_hdr)) + return -EFAULT; data_read = 4; response->first_segment = false; log_read(INFO, "returning rfc1002 length %d\n", @@ -1822,10 +1860,9 @@ again: } to_copy = min_t(int, data_length - offset, to_read); - memcpy( - buf + data_read, - (char *)data_transfer + data_offset + offset, - to_copy); + if (copy_to_iter((char *)data_transfer + data_offset + offset, + to_copy, &msg->msg_iter) != to_copy) + return -EFAULT; /* move on to the next buffer? */ if (to_copy == data_length - offset) { @@ -1891,90 +1928,6 @@ read_rfc1002_done: } /* - * Receive a page from receive reassembly queue - * page: the page to read data into - * to_read: the length of data to read - * return value: actual data read - */ -static int smbd_recv_page(struct smbd_connection *info, - struct page *page, unsigned int page_offset, - unsigned int to_read) -{ - struct smbdirect_socket *sc = &info->socket; - int ret; - char *to_address; - void *page_address; - - /* make sure we have the page ready for read */ - ret = wait_event_interruptible( - info->wait_reassembly_queue, - info->reassembly_data_length >= to_read || - sc->status != SMBDIRECT_SOCKET_CONNECTED); - if (ret) - return ret; - - /* now we can read from reassembly queue and not sleep */ - page_address = kmap_atomic(page); - to_address = (char *) page_address + page_offset; - - log_read(INFO, "reading from page=%p address=%p to_read=%d\n", - page, to_address, to_read); - - ret = smbd_recv_buf(info, to_address, to_read); - kunmap_atomic(page_address); - - return ret; -} - -/* - * Receive data from transport - * msg: a msghdr point to the buffer, can be ITER_KVEC or ITER_BVEC - * return: total bytes read, or 0. SMB Direct will not do partial read. - */ -int smbd_recv(struct smbd_connection *info, struct msghdr *msg) -{ - char *buf; - struct page *page; - unsigned int to_read, page_offset; - int rc; - - if (iov_iter_rw(&msg->msg_iter) == WRITE) { - /* It's a bug in upper layer to get there */ - cifs_dbg(VFS, "Invalid msg iter dir %u\n", - iov_iter_rw(&msg->msg_iter)); - rc = -EINVAL; - goto out; - } - - switch (iov_iter_type(&msg->msg_iter)) { - case ITER_KVEC: - buf = msg->msg_iter.kvec->iov_base; - to_read = msg->msg_iter.kvec->iov_len; - rc = smbd_recv_buf(info, buf, to_read); - break; - - case ITER_BVEC: - page = msg->msg_iter.bvec->bv_page; - page_offset = msg->msg_iter.bvec->bv_offset; - to_read = msg->msg_iter.bvec->bv_len; - rc = smbd_recv_page(info, page, page_offset, to_read); - break; - - default: - /* It's a bug in upper layer to get there */ - cifs_dbg(VFS, "Invalid msg type %d\n", - iov_iter_type(&msg->msg_iter)); - rc = -EINVAL; - } - -out: - /* SMBDirect will read it all or nothing */ - if (rc > 0) - msg->msg_iter.count = 0; - return rc; -} - -/* * Send data to transport * Each rqst is transported as a SMBDirect payload * rqst: the data to write @@ -2032,14 +1985,14 @@ int smbd_send(struct TCP_Server_Info *server, klen += rqst->rq_iov[i].iov_len; iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen); - rc = smbd_post_send_iter(info, &iter, &remaining_data_length); + rc = smbd_post_send_full_iter(info, &iter, &remaining_data_length); if (rc < 0) break; if (iov_iter_count(&rqst->rq_iter) > 0) { /* And then the data pages if there are any */ - rc = smbd_post_send_iter(info, &rqst->rq_iter, - &remaining_data_length); + rc = smbd_post_send_full_iter(info, &rqst->rq_iter, + &remaining_data_length); if (rc < 0) break; } @@ -2589,13 +2542,14 @@ static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter, size_t fsize = folioq_folio_size(folioq, slot); if (offset < fsize) { - size_t part = umin(maxsize - ret, fsize - offset); + size_t part = umin(maxsize, fsize - offset); if (!smb_set_sge(rdma, folio_page(folio, 0), offset, part)) return -EIO; offset += part; ret += part; + maxsize -= part; } if (offset >= fsize) { @@ -2610,7 +2564,7 @@ static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter, slot = 0; } } - } while (rdma->nr_sge < rdma->max_sge || maxsize > 0); + } while (rdma->nr_sge < rdma->max_sge && maxsize > 0); iter->folioq = folioq; iter->folioq_slot = slot; diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 52bcb55d9952..93e5b2bb9f28 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -140,7 +140,7 @@ DECLARE_EVENT_CLASS(smb3_rw_err_class, __entry->len = len; __entry->rc = rc; ), - TP_printk("\tR=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", + TP_printk("R=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", __entry->rreq_debug_id, __entry->rreq_debug_index, __entry->xid, __entry->sesid, __entry->tid, __entry->fid, __entry->offset, __entry->len, __entry->rc) @@ -190,7 +190,7 @@ DECLARE_EVENT_CLASS(smb3_other_err_class, __entry->len = len; __entry->rc = rc; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", + TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", __entry->xid, __entry->sesid, __entry->tid, __entry->fid, __entry->offset, __entry->len, __entry->rc) ) @@ -247,7 +247,7 @@ DECLARE_EVENT_CLASS(smb3_copy_range_err_class, __entry->len = len; __entry->rc = rc; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x rc=%d", + TP_printk("xid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x rc=%d", __entry->xid, __entry->sesid, __entry->tid, __entry->target_fid, __entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len, __entry->rc) ) @@ -298,7 +298,7 @@ DECLARE_EVENT_CLASS(smb3_copy_range_done_class, __entry->target_offset = target_offset; __entry->len = len; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x", + TP_printk("xid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x", __entry->xid, __entry->sesid, __entry->tid, __entry->target_fid, __entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len) ) @@ -482,7 +482,7 @@ DECLARE_EVENT_CLASS(smb3_fd_class, __entry->tid = tid; __entry->sesid = sesid; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx", + TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx", __entry->xid, __entry->sesid, __entry->tid, __entry->fid) ) @@ -521,7 +521,7 @@ DECLARE_EVENT_CLASS(smb3_fd_err_class, __entry->sesid = sesid; __entry->rc = rc; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx rc=%d", + TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx rc=%d", __entry->xid, __entry->sesid, __entry->tid, __entry->fid, __entry->rc) ) @@ -794,7 +794,7 @@ DECLARE_EVENT_CLASS(smb3_cmd_err_class, __entry->status = status; __entry->rc = rc; ), - TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu status=0x%x rc=%d", + TP_printk("sid=0x%llx tid=0x%x cmd=%u mid=%llu status=0x%x rc=%d", __entry->sesid, __entry->tid, __entry->cmd, __entry->mid, __entry->status, __entry->rc) ) @@ -829,7 +829,7 @@ DECLARE_EVENT_CLASS(smb3_cmd_done_class, __entry->cmd = cmd; __entry->mid = mid; ), - TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu", + TP_printk("sid=0x%llx tid=0x%x cmd=%u mid=%llu", __entry->sesid, __entry->tid, __entry->cmd, __entry->mid) ) @@ -867,7 +867,7 @@ DECLARE_EVENT_CLASS(smb3_mid_class, __entry->when_sent = when_sent; __entry->when_received = when_received; ), - TP_printk("\tcmd=%u mid=%llu pid=%u, when_sent=%lu when_rcv=%lu", + TP_printk("cmd=%u mid=%llu pid=%u, when_sent=%lu when_rcv=%lu", __entry->cmd, __entry->mid, __entry->pid, __entry->when_sent, __entry->when_received) ) @@ -898,7 +898,7 @@ DECLARE_EVENT_CLASS(smb3_exit_err_class, __assign_str(func_name); __entry->rc = rc; ), - TP_printk("\t%s: xid=%u rc=%d", + TP_printk("%s: xid=%u rc=%d", __get_str(func_name), __entry->xid, __entry->rc) ) @@ -924,7 +924,7 @@ DECLARE_EVENT_CLASS(smb3_sync_err_class, __entry->ino = ino; __entry->rc = rc; ), - TP_printk("\tino=%lu rc=%d", + TP_printk("ino=%lu rc=%d", __entry->ino, __entry->rc) ) @@ -950,7 +950,7 @@ DECLARE_EVENT_CLASS(smb3_enter_exit_class, __entry->xid = xid; __assign_str(func_name); ), - TP_printk("\t%s: xid=%u", + TP_printk("%s: xid=%u", __get_str(func_name), __entry->xid) ) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 83764c230e9d..3f04a2977ba8 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -40,7 +40,7 @@ void ksmbd_conn_free(struct ksmbd_conn *conn) kvfree(conn->request_buf); kfree(conn->preauth_info); if (atomic_dec_and_test(&conn->refcnt)) { - ksmbd_free_transport(conn->transport); + conn->transport->ops->free_transport(conn->transport); kfree(conn); } } diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 6efed923bd68..dd3e0e3f7bf0 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -133,6 +133,7 @@ struct ksmbd_transport_ops { void *buf, unsigned int len, struct smb2_buffer_desc_v1 *desc, unsigned int desc_len); + void (*free_transport)(struct ksmbd_transport *kt); }; struct ksmbd_transport { diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 1a308171b599..63d17cea2e95 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1607,17 +1607,18 @@ static int krb5_authenticate(struct ksmbd_work *work, out_len = work->response_sz - (le16_to_cpu(rsp->SecurityBufferOffset) + 4); - /* Check previous session */ - prev_sess_id = le64_to_cpu(req->PreviousSessionId); - if (prev_sess_id && prev_sess_id != sess->id) - destroy_previous_session(conn, sess->user, prev_sess_id); - retval = ksmbd_krb5_authenticate(sess, in_blob, in_len, out_blob, &out_len); if (retval) { ksmbd_debug(SMB, "krb5 authentication failed\n"); return -EINVAL; } + + /* Check previous session */ + prev_sess_id = le64_to_cpu(req->PreviousSessionId); + if (prev_sess_id && prev_sess_id != sess->id) + destroy_previous_session(conn, sess->user, prev_sess_id); + rsp->SecurityBufferLength = cpu_to_le16(out_len); if ((conn->sign || server_conf.enforced_signing) || @@ -4871,8 +4872,13 @@ static int get_file_standard_info(struct smb2_query_info_rsp *rsp, sinfo = (struct smb2_file_standard_info *)rsp->Buffer; delete_pending = ksmbd_inode_pending_delete(fp); - sinfo->AllocationSize = cpu_to_le64(stat.blocks << 9); - sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + if (ksmbd_stream_fd(fp) == false) { + sinfo->AllocationSize = cpu_to_le64(stat.blocks << 9); + sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + } else { + sinfo->AllocationSize = cpu_to_le64(fp->stream.size); + sinfo->EndOfFile = cpu_to_le64(fp->stream.size); + } sinfo->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending); sinfo->DeletePending = delete_pending; sinfo->Directory = S_ISDIR(stat.mode) ? 1 : 0; @@ -4935,9 +4941,14 @@ static int get_file_all_info(struct ksmbd_work *work, file_info->ChangeTime = cpu_to_le64(time); file_info->Attributes = fp->f_ci->m_fattr; file_info->Pad1 = 0; - file_info->AllocationSize = - cpu_to_le64(stat.blocks << 9); - file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + if (ksmbd_stream_fd(fp) == false) { + file_info->AllocationSize = + cpu_to_le64(stat.blocks << 9); + file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + } else { + file_info->AllocationSize = cpu_to_le64(fp->stream.size); + file_info->EndOfFile = cpu_to_le64(fp->stream.size); + } file_info->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending); file_info->DeletePending = delete_pending; @@ -4946,7 +4957,10 @@ static int get_file_all_info(struct ksmbd_work *work, file_info->IndexNumber = cpu_to_le64(stat.ino); file_info->EASize = 0; file_info->AccessFlags = fp->daccess; - file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos); + if (ksmbd_stream_fd(fp) == false) + file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos); + else + file_info->CurrentByteOffset = cpu_to_le64(fp->stream.pos); file_info->Mode = fp->coption; file_info->AlignmentRequirement = 0; conv_len = smbConvertToUTF16((__le16 *)file_info->FileName, filename, @@ -5134,8 +5148,13 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp, time = ksmbd_UnixTimeToNT(stat.ctime); file_info->ChangeTime = cpu_to_le64(time); file_info->Attributes = fp->f_ci->m_fattr; - file_info->AllocationSize = cpu_to_le64(stat.blocks << 9); - file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + if (ksmbd_stream_fd(fp) == false) { + file_info->AllocationSize = cpu_to_le64(stat.blocks << 9); + file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + } else { + file_info->AllocationSize = cpu_to_le64(fp->stream.size); + file_info->EndOfFile = cpu_to_le64(fp->stream.size); + } file_info->Reserved = cpu_to_le32(0); rsp->OutputBufferLength = cpu_to_le32(sizeof(struct smb2_file_ntwrk_info)); @@ -5158,7 +5177,11 @@ static void get_file_position_info(struct smb2_query_info_rsp *rsp, struct smb2_file_pos_info *file_info; file_info = (struct smb2_file_pos_info *)rsp->Buffer; - file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos); + if (ksmbd_stream_fd(fp) == false) + file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos); + else + file_info->CurrentByteOffset = cpu_to_le64(fp->stream.pos); + rsp->OutputBufferLength = cpu_to_le32(sizeof(struct smb2_file_pos_info)); } @@ -5247,8 +5270,13 @@ static int find_file_posix_info(struct smb2_query_info_rsp *rsp, file_info->ChangeTime = cpu_to_le64(time); file_info->DosAttributes = fp->f_ci->m_fattr; file_info->Inode = cpu_to_le64(stat.ino); - file_info->EndOfFile = cpu_to_le64(stat.size); - file_info->AllocationSize = cpu_to_le64(stat.blocks << 9); + if (ksmbd_stream_fd(fp) == false) { + file_info->EndOfFile = cpu_to_le64(stat.size); + file_info->AllocationSize = cpu_to_le64(stat.blocks << 9); + } else { + file_info->EndOfFile = cpu_to_le64(fp->stream.size); + file_info->AllocationSize = cpu_to_le64(fp->stream.size); + } file_info->HardLinks = cpu_to_le32(stat.nlink); file_info->Mode = cpu_to_le32(stat.mode & 0777); switch (stat.mode & S_IFMT) { @@ -6190,6 +6218,9 @@ static int set_file_allocation_info(struct ksmbd_work *work, if (!(fp->daccess & FILE_WRITE_DATA_LE)) return -EACCES; + if (ksmbd_stream_fd(fp) == true) + return 0; + rc = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); if (rc) @@ -6248,7 +6279,8 @@ static int set_end_of_file_info(struct ksmbd_work *work, struct ksmbd_file *fp, * truncate of some filesystem like FAT32 fill zero data in * truncated range. */ - if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC) { + if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC && + ksmbd_stream_fd(fp) == false) { ksmbd_debug(SMB, "truncated to newsize %lld\n", newsize); rc = ksmbd_vfs_truncate(work, fp, newsize); if (rc) { @@ -6321,7 +6353,13 @@ static int set_file_position_info(struct ksmbd_file *fp, return -EINVAL; } - fp->filp->f_pos = current_byte_offset; + if (ksmbd_stream_fd(fp) == false) + fp->filp->f_pos = current_byte_offset; + else { + if (current_byte_offset > XATTR_SIZE_MAX) + current_byte_offset = XATTR_SIZE_MAX; + fp->stream.pos = current_byte_offset; + } return 0; } @@ -8535,11 +8573,6 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work) goto err_out; } - opinfo->op_state = OPLOCK_STATE_NONE; - wake_up_interruptible_all(&opinfo->oplock_q); - opinfo_put(opinfo); - ksmbd_fd_put(work, fp); - rsp->StructureSize = cpu_to_le16(24); rsp->OplockLevel = rsp_oplevel; rsp->Reserved = 0; @@ -8547,16 +8580,15 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work) rsp->VolatileFid = volatile_id; rsp->PersistentFid = persistent_id; ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_oplock_break)); - if (!ret) - return; - + if (ret) { err_out: + smb2_set_err_rsp(work); + } + opinfo->op_state = OPLOCK_STATE_NONE; wake_up_interruptible_all(&opinfo->oplock_q); - opinfo_put(opinfo); ksmbd_fd_put(work, fp); - smb2_set_err_rsp(work); } static int check_lease_state(struct lease *lease, __le32 req_state) @@ -8686,11 +8718,6 @@ static void smb21_lease_break_ack(struct ksmbd_work *work) } lease_state = lease->state; - opinfo->op_state = OPLOCK_STATE_NONE; - wake_up_interruptible_all(&opinfo->oplock_q); - atomic_dec(&opinfo->breaking_cnt); - wake_up_interruptible_all(&opinfo->oplock_brk); - opinfo_put(opinfo); rsp->StructureSize = cpu_to_le16(36); rsp->Reserved = 0; @@ -8699,16 +8726,16 @@ static void smb21_lease_break_ack(struct ksmbd_work *work) rsp->LeaseState = lease_state; rsp->LeaseDuration = 0; ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_lease_ack)); - if (!ret) - return; - + if (ret) { err_out: + smb2_set_err_rsp(work); + } + + opinfo->op_state = OPLOCK_STATE_NONE; wake_up_interruptible_all(&opinfo->oplock_q); atomic_dec(&opinfo->breaking_cnt); wake_up_interruptible_all(&opinfo->oplock_brk); - opinfo_put(opinfo); - smb2_set_err_rsp(work); } /** diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 4998df04ab95..c6cbe0d56e32 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -159,7 +159,8 @@ struct smb_direct_transport { }; #define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport)) - +#define SMBD_TRANS(t) ((struct smb_direct_transport *)container_of(t, \ + struct smb_direct_transport, transport)) enum { SMB_DIRECT_MSG_NEGOTIATE_REQ = 0, SMB_DIRECT_MSG_DATA_TRANSFER @@ -410,6 +411,11 @@ err: return NULL; } +static void smb_direct_free_transport(struct ksmbd_transport *kt) +{ + kfree(SMBD_TRANS(kt)); +} + static void free_transport(struct smb_direct_transport *t) { struct smb_direct_recvmsg *recvmsg; @@ -427,7 +433,8 @@ static void free_transport(struct smb_direct_transport *t) if (t->qp) { ib_drain_qp(t->qp); ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs); - ib_destroy_qp(t->qp); + t->qp = NULL; + rdma_destroy_qp(t->cm_id); } ksmbd_debug(RDMA, "drain the reassembly queue\n"); @@ -455,7 +462,6 @@ static void free_transport(struct smb_direct_transport *t) smb_direct_destroy_pools(t); ksmbd_conn_free(KSMBD_TRANS(t)->conn); - kfree(t); } static struct smb_direct_sendmsg @@ -1935,8 +1941,8 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t, return 0; err: if (t->qp) { - ib_destroy_qp(t->qp); t->qp = NULL; + rdma_destroy_qp(t->cm_id); } if (t->recv_cq) { ib_destroy_cq(t->recv_cq); @@ -2281,4 +2287,5 @@ static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { .read = smb_direct_read, .rdma_read = smb_direct_rdma_read, .rdma_write = smb_direct_rdma_write, + .free_transport = smb_direct_free_transport, }; diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index abedf510899a..4e9f98db9ff4 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -93,7 +93,7 @@ static struct tcp_transport *alloc_transport(struct socket *client_sk) return t; } -void ksmbd_free_transport(struct ksmbd_transport *kt) +static void ksmbd_tcp_free_transport(struct ksmbd_transport *kt) { struct tcp_transport *t = TCP_TRANS(kt); @@ -656,4 +656,5 @@ static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops = { .read = ksmbd_tcp_read, .writev = ksmbd_tcp_writev, .disconnect = ksmbd_tcp_disconnect, + .free_transport = ksmbd_tcp_free_transport, }; diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index ba45e809555a..d3437f6644e3 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -293,6 +293,7 @@ static int ksmbd_vfs_stream_read(struct ksmbd_file *fp, char *buf, loff_t *pos, if (v_len - *pos < count) count = v_len - *pos; + fp->stream.pos = v_len; memcpy(buf, &stream_buf[*pos], count); @@ -456,8 +457,8 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, true); if (err < 0) goto out; - - fp->filp->f_pos = *pos; + else + fp->stream.pos = size; err = 0; out: kvfree(stream_buf); @@ -1281,6 +1282,7 @@ out1: err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry); if (err) { + mnt_drop_write(parent_path->mnt); path_put(path); path_put(parent_path); } diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index 5bbb179736c2..0708155b5caf 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -44,6 +44,7 @@ struct ksmbd_lock { struct stream { char *name; ssize_t size; + loff_t pos; }; struct ksmbd_inode { diff --git a/fs/super.c b/fs/super.c index 21799e213fd7..80418ca8e215 100644 --- a/fs/super.c +++ b/fs/super.c @@ -964,8 +964,10 @@ void iterate_supers_type(struct file_system_type *type, spin_unlock(&sb_lock); locked = super_lock_shared(sb); - if (locked) + if (locked) { f(sb, arg); + super_unlock_shared(sb); + } spin_lock(&sb_lock); if (p) diff --git a/fs/xattr.c b/fs/xattr.c index 8ec5b0204bfd..600ae97969cf 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -1479,6 +1479,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, buffer += err; } remaining_size -= err; + err = 0; read_lock(&xattrs->lock); for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) { diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 7839efe050bf..000cc7f4a3ce 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3444,16 +3444,41 @@ xfs_alloc_read_agf( set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate); } + #ifdef DEBUG - else if (!xfs_is_shutdown(mp)) { - ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); - ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); - ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); - ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); - ASSERT(pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level)); - ASSERT(pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level)); + /* + * It's possible for the AGF to be out of sync if the block device is + * silently dropping writes. This can happen in fstests with dmflakey + * enabled, which allows the buffer to be cleaned and reclaimed by + * memory pressure and then re-read from disk here. We will get a + * stale version of the AGF from disk, and nothing good can happen from + * here. Hence if we detect this situation, immediately shut down the + * filesystem. + * + * This can also happen if we are already in the middle of a forced + * shutdown, so don't bother checking if we are already shut down. + */ + if (!xfs_is_shutdown(pag_mount(pag))) { + bool ok = true; + + ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks); + ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks); + ok &= pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks); + ok &= pag->pagf_flcount == be32_to_cpu(agf->agf_flcount); + ok &= pag->pagf_longest == be32_to_cpu(agf->agf_longest); + ok &= pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level); + ok &= pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level); + + if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) { + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF); + xfs_trans_brelse(tp, agfbp); + xfs_force_shutdown(pag_mount(pag), + SHUTDOWN_CORRUPT_ONDISK); + return -EFSCORRUPTED; + } } -#endif +#endif /* DEBUG */ + if (agfbpp) *agfbpp = agfbp; else diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 0c47b5c6ca7d..750111634d9f 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2801,12 +2801,35 @@ xfs_ialloc_read_agi( set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate); } +#ifdef DEBUG /* - * It's possible for these to be out of sync if - * we are in the middle of a forced shutdown. + * It's possible for the AGF to be out of sync if the block device is + * silently dropping writes. This can happen in fstests with dmflakey + * enabled, which allows the buffer to be cleaned and reclaimed by + * memory pressure and then re-read from disk here. We will get a + * stale version of the AGF from disk, and nothing good can happen from + * here. Hence if we detect this situation, immediately shut down the + * filesystem. + * + * This can also happen if we are already in the middle of a forced + * shutdown, so don't bother checking if we are already shut down. */ - ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || - xfs_is_shutdown(pag_mount(pag))); + if (!xfs_is_shutdown(pag_mount(pag))) { + bool ok = true; + + ok &= pag->pagi_freecount == be32_to_cpu(agi->agi_freecount); + ok &= pag->pagi_count == be32_to_cpu(agi->agi_count); + + if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) { + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); + xfs_trans_brelse(tp, agibp); + xfs_force_shutdown(pag_mount(pag), + SHUTDOWN_CORRUPT_ONDISK); + return -EFSCORRUPTED; + } + } +#endif /* DEBUG */ + if (agibpp) *agibpp = agibp; else diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 8af83bd161f9..ba5bd6031ece 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -2082,44 +2082,6 @@ xfs_buf_delwri_submit( return error; } -/* - * Push a single buffer on a delwri queue. - * - * The purpose of this function is to submit a single buffer of a delwri queue - * and return with the buffer still on the original queue. - * - * The buffer locking and queue management logic between _delwri_pushbuf() and - * _delwri_queue() guarantee that the buffer cannot be queued to another list - * before returning. - */ -int -xfs_buf_delwri_pushbuf( - struct xfs_buf *bp, - struct list_head *buffer_list) -{ - int error; - - ASSERT(bp->b_flags & _XBF_DELWRI_Q); - - trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_); - - xfs_buf_lock(bp); - bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC); - bp->b_flags |= XBF_WRITE; - xfs_buf_submit(bp); - - /* - * The buffer is now locked, under I/O but still on the original delwri - * queue. Wait for I/O completion, restore the DELWRI_Q flag and - * return with the buffer unlocked and still on the original queue. - */ - error = xfs_buf_iowait(bp); - bp->b_flags |= _XBF_DELWRI_Q; - xfs_buf_unlock(bp); - - return error; -} - void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) { /* diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 9d2ab567cf81..15fc56948346 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -326,7 +326,6 @@ extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl); extern int xfs_buf_delwri_submit(struct list_head *); extern int xfs_buf_delwri_submit_nowait(struct list_head *); -extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *); static inline xfs_daddr_t xfs_buf_daddr(struct xfs_buf *bp) { diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 90139e0f3271..7fc54725c5f6 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -32,6 +32,61 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip) return container_of(lip, struct xfs_buf_log_item, bli_item); } +static void +xfs_buf_item_get_format( + struct xfs_buf_log_item *bip, + int count) +{ + ASSERT(bip->bli_formats == NULL); + bip->bli_format_count = count; + + if (count == 1) { + bip->bli_formats = &bip->__bli_format; + return; + } + + bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format), + GFP_KERNEL | __GFP_NOFAIL); +} + +static void +xfs_buf_item_free_format( + struct xfs_buf_log_item *bip) +{ + if (bip->bli_formats != &bip->__bli_format) { + kfree(bip->bli_formats); + bip->bli_formats = NULL; + } +} + +static void +xfs_buf_item_free( + struct xfs_buf_log_item *bip) +{ + xfs_buf_item_free_format(bip); + kvfree(bip->bli_item.li_lv_shadow); + kmem_cache_free(xfs_buf_item_cache, bip); +} + +/* + * xfs_buf_item_relse() is called when the buf log item is no longer needed. + */ +static void +xfs_buf_item_relse( + struct xfs_buf_log_item *bip) +{ + struct xfs_buf *bp = bip->bli_buf; + + trace_xfs_buf_item_relse(bp, _RET_IP_); + + ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); + ASSERT(atomic_read(&bip->bli_refcount) == 0); + + bp->b_log_item = NULL; + xfs_buf_rele(bp); + xfs_buf_item_free(bip); +} + /* Is this log iovec plausibly large enough to contain the buffer log format? */ bool xfs_buf_log_check_iovec( @@ -390,6 +445,42 @@ xfs_buf_item_pin( } /* + * For a stale BLI, process all the necessary completions that must be + * performed when the final BLI reference goes away. The buffer will be + * referenced and locked here - we return to the caller with the buffer still + * referenced and locked for them to finalise processing of the buffer. + */ +static void +xfs_buf_item_finish_stale( + struct xfs_buf_log_item *bip) +{ + struct xfs_buf *bp = bip->bli_buf; + struct xfs_log_item *lip = &bip->bli_item; + + ASSERT(bip->bli_flags & XFS_BLI_STALE); + ASSERT(xfs_buf_islocked(bp)); + ASSERT(bp->b_flags & XBF_STALE); + ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); + ASSERT(list_empty(&lip->li_trans)); + ASSERT(!bp->b_transp); + + if (bip->bli_flags & XFS_BLI_STALE_INODE) { + xfs_buf_item_done(bp); + xfs_buf_inode_iodone(bp); + ASSERT(list_empty(&bp->b_li_list)); + return; + } + + /* + * We may or may not be on the AIL here, xfs_trans_ail_delete() will do + * the right thing regardless of the situation in which we are called. + */ + xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR); + xfs_buf_item_relse(bip); + ASSERT(bp->b_log_item == NULL); +} + +/* * This is called to unpin the buffer associated with the buf log item which was * previously pinned with a call to xfs_buf_item_pin(). We enter this function * with a buffer pin count, a buffer reference and a BLI reference. @@ -438,13 +529,6 @@ xfs_buf_item_unpin( } if (stale) { - ASSERT(bip->bli_flags & XFS_BLI_STALE); - ASSERT(xfs_buf_islocked(bp)); - ASSERT(bp->b_flags & XBF_STALE); - ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); - ASSERT(list_empty(&lip->li_trans)); - ASSERT(!bp->b_transp); - trace_xfs_buf_item_unpin_stale(bip); /* @@ -455,22 +539,7 @@ xfs_buf_item_unpin( * processing is complete. */ xfs_buf_rele(bp); - - /* - * If we get called here because of an IO error, we may or may - * not have the item on the AIL. xfs_trans_ail_delete() will - * take care of that situation. xfs_trans_ail_delete() drops - * the AIL lock. - */ - if (bip->bli_flags & XFS_BLI_STALE_INODE) { - xfs_buf_item_done(bp); - xfs_buf_inode_iodone(bp); - ASSERT(list_empty(&bp->b_li_list)); - } else { - xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR); - xfs_buf_item_relse(bp); - ASSERT(bp->b_log_item == NULL); - } + xfs_buf_item_finish_stale(bip); xfs_buf_relse(bp); return; } @@ -543,43 +612,42 @@ xfs_buf_item_push( * Drop the buffer log item refcount and take appropriate action. This helper * determines whether the bli must be freed or not, since a decrement to zero * does not necessarily mean the bli is unused. - * - * Return true if the bli is freed, false otherwise. */ -bool +void xfs_buf_item_put( struct xfs_buf_log_item *bip) { - struct xfs_log_item *lip = &bip->bli_item; - bool aborted; - bool dirty; + + ASSERT(xfs_buf_islocked(bip->bli_buf)); /* drop the bli ref and return if it wasn't the last one */ if (!atomic_dec_and_test(&bip->bli_refcount)) - return false; + return; - /* - * We dropped the last ref and must free the item if clean or aborted. - * If the bli is dirty and non-aborted, the buffer was clean in the - * transaction but still awaiting writeback from previous changes. In - * that case, the bli is freed on buffer writeback completion. - */ - aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) || - xlog_is_shutdown(lip->li_log); - dirty = bip->bli_flags & XFS_BLI_DIRTY; - if (dirty && !aborted) - return false; + /* If the BLI is in the AIL, then it is still dirty and in use */ + if (test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)) { + ASSERT(bip->bli_flags & XFS_BLI_DIRTY); + return; + } /* - * The bli is aborted or clean. An aborted item may be in the AIL - * regardless of dirty state. For example, consider an aborted - * transaction that invalidated a dirty bli and cleared the dirty - * state. + * In shutdown conditions, we can be asked to free a dirty BLI that + * isn't in the AIL. This can occur due to a checkpoint aborting a BLI + * instead of inserting it into the AIL at checkpoint IO completion. If + * there's another bli reference (e.g. a btree cursor holds a clean + * reference) and it is released via xfs_trans_brelse(), we can get here + * with that aborted, dirty BLI. In this case, it is safe to free the + * dirty BLI immediately, as it is not in the AIL and there are no + * other references to it. + * + * We should never get here with a stale BLI via that path as + * xfs_trans_brelse() specifically holds onto stale buffers rather than + * releasing them. */ - if (aborted) - xfs_trans_ail_delete(lip, 0); - xfs_buf_item_relse(bip->bli_buf); - return true; + ASSERT(!(bip->bli_flags & XFS_BLI_DIRTY) || + test_bit(XFS_LI_ABORTED, &bip->bli_item.li_flags)); + ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); + xfs_buf_item_relse(bip); } /* @@ -600,6 +668,15 @@ xfs_buf_item_put( * if necessary but do not unlock the buffer. This is for support of * xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't * free the item. + * + * If the XFS_BLI_STALE flag is set, the last reference to the BLI *must* + * perform a completion abort of any objects attached to the buffer for IO + * tracking purposes. This generally only happens in shutdown situations, + * normally xfs_buf_item_unpin() will drop the last BLI reference and perform + * completion processing. However, because transaction completion can race with + * checkpoint completion during a shutdown, this release context may end up + * being the last active reference to the BLI and so needs to perform this + * cleanup. */ STATIC void xfs_buf_item_release( @@ -607,18 +684,19 @@ xfs_buf_item_release( { struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; - bool released; bool hold = bip->bli_flags & XFS_BLI_HOLD; bool stale = bip->bli_flags & XFS_BLI_STALE; -#if defined(DEBUG) || defined(XFS_WARN) - bool ordered = bip->bli_flags & XFS_BLI_ORDERED; - bool dirty = bip->bli_flags & XFS_BLI_DIRTY; bool aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags); + bool dirty = bip->bli_flags & XFS_BLI_DIRTY; +#if defined(DEBUG) || defined(XFS_WARN) + bool ordered = bip->bli_flags & XFS_BLI_ORDERED; #endif trace_xfs_buf_item_release(bip); + ASSERT(xfs_buf_islocked(bp)); + /* * The bli dirty state should match whether the blf has logged segments * except for ordered buffers, where only the bli should be dirty. @@ -634,16 +712,56 @@ xfs_buf_item_release( bp->b_transp = NULL; bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED); + /* If there are other references, then we have nothing to do. */ + if (!atomic_dec_and_test(&bip->bli_refcount)) + goto out_release; + + /* + * Stale buffer completion frees the BLI, unlocks and releases the + * buffer. Neither the BLI or buffer are safe to reference after this + * call, so there's nothing more we need to do here. + * + * If we get here with a stale buffer and references to the BLI remain, + * we must not unlock the buffer as the last BLI reference owns lock + * context, not us. + */ + if (stale) { + xfs_buf_item_finish_stale(bip); + xfs_buf_relse(bp); + ASSERT(!hold); + return; + } + /* - * Unref the item and unlock the buffer unless held or stale. Stale - * buffers remain locked until final unpin unless the bli is freed by - * the unref call. The latter implies shutdown because buffer - * invalidation dirties the bli and transaction. + * Dirty or clean, aborted items are done and need to be removed from + * the AIL and released. This frees the BLI, but leaves the buffer + * locked and referenced. */ - released = xfs_buf_item_put(bip); - if (hold || (stale && !released)) + if (aborted || xlog_is_shutdown(lip->li_log)) { + ASSERT(list_empty(&bip->bli_buf->b_li_list)); + xfs_buf_item_done(bp); + goto out_release; + } + + /* + * Clean, unreferenced BLIs can be immediately freed, leaving the buffer + * locked and referenced. + * + * Dirty, unreferenced BLIs *must* be in the AIL awaiting writeback. + */ + if (!dirty) + xfs_buf_item_relse(bip); + else + ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags)); + + /* Not safe to reference the BLI from here */ +out_release: + /* + * If we get here with a stale buffer, we must not unlock the + * buffer as the last BLI reference owns lock context, not us. + */ + if (stale || hold) return; - ASSERT(!stale || aborted); xfs_buf_relse(bp); } @@ -729,33 +847,6 @@ static const struct xfs_item_ops xfs_buf_item_ops = { .iop_push = xfs_buf_item_push, }; -STATIC void -xfs_buf_item_get_format( - struct xfs_buf_log_item *bip, - int count) -{ - ASSERT(bip->bli_formats == NULL); - bip->bli_format_count = count; - - if (count == 1) { - bip->bli_formats = &bip->__bli_format; - return; - } - - bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format), - GFP_KERNEL | __GFP_NOFAIL); -} - -STATIC void -xfs_buf_item_free_format( - struct xfs_buf_log_item *bip) -{ - if (bip->bli_formats != &bip->__bli_format) { - kfree(bip->bli_formats); - bip->bli_formats = NULL; - } -} - /* * Allocate a new buf log item to go with the given buffer. * Set the buffer's b_log_item field to point to the new @@ -976,34 +1067,6 @@ xfs_buf_item_dirty_format( return false; } -STATIC void -xfs_buf_item_free( - struct xfs_buf_log_item *bip) -{ - xfs_buf_item_free_format(bip); - kvfree(bip->bli_item.li_lv_shadow); - kmem_cache_free(xfs_buf_item_cache, bip); -} - -/* - * xfs_buf_item_relse() is called when the buf log item is no longer needed. - */ -void -xfs_buf_item_relse( - struct xfs_buf *bp) -{ - struct xfs_buf_log_item *bip = bp->b_log_item; - - trace_xfs_buf_item_relse(bp, _RET_IP_); - ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); - - if (atomic_read(&bip->bli_refcount)) - return; - bp->b_log_item = NULL; - xfs_buf_rele(bp); - xfs_buf_item_free(bip); -} - void xfs_buf_item_done( struct xfs_buf *bp) @@ -1023,5 +1086,5 @@ xfs_buf_item_done( xfs_trans_ail_delete(&bp->b_log_item->bli_item, (bp->b_flags & _XBF_LOGRECOVERY) ? 0 : SHUTDOWN_CORRUPT_INCORE); - xfs_buf_item_relse(bp); + xfs_buf_item_relse(bp->b_log_item); } diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index e10e324cd245..416890b84f8c 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -49,8 +49,7 @@ struct xfs_buf_log_item { int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); void xfs_buf_item_done(struct xfs_buf *bp); -void xfs_buf_item_relse(struct xfs_buf *); -bool xfs_buf_item_put(struct xfs_buf_log_item *); +void xfs_buf_item_put(struct xfs_buf_log_item *bip); void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); void xfs_buf_inode_iodone(struct xfs_buf *); diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index b4e32f0860b7..0bd8022e47b4 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -1398,11 +1398,9 @@ xfs_qm_dqflush( ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(!completion_done(&dqp->q_flush)); + ASSERT(atomic_read(&dqp->q_pincount) == 0); trace_xfs_dqflush(dqp); - - xfs_qm_dqunpin_wait(dqp); - fa = xfs_qm_dqflush_check(dqp); if (fa) { xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS", diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 48254a72071b..0b41b18debf3 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1335,9 +1335,10 @@ xfs_falloc_allocate_range( } #define XFS_FALLOC_FL_SUPPORTED \ - (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ - FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ - FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE) + (FALLOC_FL_ALLOCATE_RANGE | FALLOC_FL_KEEP_SIZE | \ + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | \ + FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE | \ + FALLOC_FL_UNSHARE_RANGE) STATIC long __xfs_file_fallocate( diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 726e29b837e6..bbc2f2973dcc 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -979,7 +979,15 @@ xfs_reclaim_inode( */ if (xlog_is_shutdown(ip->i_mount->m_log)) { xfs_iunpin_wait(ip); + /* + * Avoid a ABBA deadlock on the inode cluster buffer vs + * concurrent xfs_ifree_cluster() trying to mark the inode + * stale. We don't need the inode locked to run the flush abort + * code, but the flush abort needs to lock the cluster buffer. + */ + xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iflush_shutdown_abort(ip); + xfs_ilock(ip, XFS_ILOCK_EXCL); goto reclaim; } if (xfs_ipincount(ip)) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ee3e0f284287..761a996a857c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1635,7 +1635,7 @@ retry: iip = ip->i_itemp; if (__xfs_iflags_test(ip, XFS_IFLUSHING)) { ASSERT(!list_empty(&iip->ili_item.li_bio_list)); - ASSERT(iip->ili_last_fields); + ASSERT(iip->ili_last_fields || xlog_is_shutdown(mp->m_log)); goto out_iunlock; } diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index c6cb0b6b9e46..285e27ff89e2 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -758,11 +758,14 @@ xfs_inode_item_push( * completed and items removed from the AIL before the next push * attempt. */ + trace_xfs_inode_push_stale(ip, _RET_IP_); return XFS_ITEM_PINNED; } - if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) + if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) { + trace_xfs_inode_push_pinned(ip, _RET_IP_); return XFS_ITEM_PINNED; + } if (xfs_iflags_test(ip, XFS_IFLUSHING)) return XFS_ITEM_FLUSHING; diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index f66d2d430e4f..a80cb6b9969a 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -793,8 +793,10 @@ xlog_cil_ail_insert( struct xfs_log_item *lip = lv->lv_item; xfs_lsn_t item_lsn; - if (aborted) + if (aborted) { + trace_xlog_ail_insert_abort(lip); set_bit(XFS_LI_ABORTED, &lip->li_flags); + } if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) { lip->li_ops->iop_release(lip); diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 08443ceec329..866c71d9fbae 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -320,7 +320,7 @@ xfs_mru_cache_create( xfs_mru_cache_free_func_t free_func) { struct xfs_mru_cache *mru = NULL; - int err = 0, grp; + int grp; unsigned int grp_time; if (mrup) @@ -341,8 +341,8 @@ xfs_mru_cache_create( mru->lists = kzalloc(mru->grp_count * sizeof(*mru->lists), GFP_KERNEL | __GFP_NOFAIL); if (!mru->lists) { - err = -ENOMEM; - goto exit; + kfree(mru); + return -ENOMEM; } for (grp = 0; grp < mru->grp_count; grp++) @@ -361,14 +361,7 @@ xfs_mru_cache_create( mru->free_func = free_func; mru->data = data; *mrup = mru; - -exit: - if (err && mru && mru->lists) - kfree(mru->lists); - if (err && mru) - kfree(mru); - - return err; + return 0; } /* @@ -425,10 +418,6 @@ xfs_mru_cache_insert( { int error = -EINVAL; - ASSERT(mru && mru->lists); - if (!mru || !mru->lists) - goto out_free; - error = -ENOMEM; if (radix_tree_preload(GFP_KERNEL)) goto out_free; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 417439b58785..fa135ac26471 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -134,6 +134,7 @@ xfs_qm_dqpurge( dqp->q_flags |= XFS_DQFLAG_FREEING; + xfs_qm_dqunpin_wait(dqp); xfs_dqflock(dqp); /* @@ -465,6 +466,7 @@ xfs_qm_dquot_isolate( struct xfs_dquot *dqp = container_of(item, struct xfs_dquot, q_lru); struct xfs_qm_isolate *isol = arg; + enum lru_status ret = LRU_SKIP; if (!xfs_dqlock_nowait(dqp)) goto out_miss_busy; @@ -478,6 +480,16 @@ xfs_qm_dquot_isolate( goto out_miss_unlock; /* + * If the dquot is pinned or dirty, rotate it to the end of the LRU to + * give some time for it to be cleaned before we try to isolate it + * again. + */ + ret = LRU_ROTATE; + if (XFS_DQ_IS_DIRTY(dqp) || atomic_read(&dqp->q_pincount) > 0) { + goto out_miss_unlock; + } + + /* * This dquot has acquired a reference in the meantime remove it from * the freelist and try again. */ @@ -492,41 +504,14 @@ xfs_qm_dquot_isolate( } /* - * If the dquot is dirty, flush it. If it's already being flushed, just - * skip it so there is time for the IO to complete before we try to - * reclaim it again on the next LRU pass. + * The dquot may still be under IO, in which case the flush lock will be + * held. If we can't get the flush lock now, just skip over the dquot as + * if it was dirty. */ if (!xfs_dqflock_nowait(dqp)) goto out_miss_unlock; - if (XFS_DQ_IS_DIRTY(dqp)) { - struct xfs_buf *bp = NULL; - int error; - - trace_xfs_dqreclaim_dirty(dqp); - - /* we have to drop the LRU lock to flush the dquot */ - spin_unlock(&lru->lock); - - error = xfs_dquot_use_attached_buf(dqp, &bp); - if (!bp || error == -EAGAIN) { - xfs_dqfunlock(dqp); - goto out_unlock_dirty; - } - - /* - * dqflush completes dqflock on error, and the delwri ioend - * does it on success. - */ - error = xfs_qm_dqflush(dqp, bp); - if (error) - goto out_unlock_dirty; - - xfs_buf_delwri_queue(bp, &isol->buffers); - xfs_buf_relse(bp); - goto out_unlock_dirty; - } - + ASSERT(!XFS_DQ_IS_DIRTY(dqp)); xfs_dquot_detach_buf(dqp); xfs_dqfunlock(dqp); @@ -548,13 +533,7 @@ out_miss_unlock: out_miss_busy: trace_xfs_dqreclaim_busy(dqp); XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); - return LRU_SKIP; - -out_unlock_dirty: - trace_xfs_dqreclaim_busy(dqp); - XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); - xfs_dqunlock(dqp); - return LRU_RETRY; + return ret; } static unsigned long @@ -1486,7 +1465,6 @@ xfs_qm_flush_one( struct xfs_dquot *dqp, void *data) { - struct xfs_mount *mp = dqp->q_mount; struct list_head *buffer_list = data; struct xfs_buf *bp = NULL; int error = 0; @@ -1497,34 +1475,8 @@ xfs_qm_flush_one( if (!XFS_DQ_IS_DIRTY(dqp)) goto out_unlock; - /* - * The only way the dquot is already flush locked by the time quotacheck - * gets here is if reclaim flushed it before the dqadjust walk dirtied - * it for the final time. Quotacheck collects all dquot bufs in the - * local delwri queue before dquots are dirtied, so reclaim can't have - * possibly queued it for I/O. The only way out is to push the buffer to - * cycle the flush lock. - */ - if (!xfs_dqflock_nowait(dqp)) { - /* buf is pinned in-core by delwri list */ - error = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, 0, &bp); - if (error) - goto out_unlock; - - if (!(bp->b_flags & _XBF_DELWRI_Q)) { - error = -EAGAIN; - xfs_buf_relse(bp); - goto out_unlock; - } - xfs_buf_unlock(bp); - - xfs_buf_delwri_pushbuf(bp, buffer_list); - xfs_buf_rele(bp); - - error = -EAGAIN; - goto out_unlock; - } + xfs_qm_dqunpin_wait(dqp); + xfs_dqflock(dqp); error = xfs_dquot_use_attached_buf(dqp, &bp); if (error) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 6484c596ecea..736eb0924573 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1259,6 +1259,8 @@ xfs_growfs_check_rtgeom( kfree(nmp); + trace_xfs_growfs_check_rtgeom(mp, min_logfsbs); + if (min_logfsbs > mp->m_sb.sb_logblocks) return -EINVAL; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 0bc4b5489078..bb0a82635a77 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2020,14 +2020,13 @@ xfs_remount_rw( int error; if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp && - bdev_read_only(mp->m_logdev_targp->bt_bdev)) { + xfs_readonly_buftarg(mp->m_logdev_targp)) { xfs_warn(mp, "ro->rw transition prohibited by read-only logdev"); return -EACCES; } - if (mp->m_rtdev_targp && - bdev_read_only(mp->m_rtdev_targp->bt_bdev)) { + if (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp)) { xfs_warn(mp, "ro->rw transition prohibited by read-only rtdev"); return -EACCES; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 01d284a1c759..ba45d801df1c 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -778,7 +778,6 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done); DEFINE_BUF_EVENT(xfs_buf_delwri_queue); DEFINE_BUF_EVENT(xfs_buf_delwri_queued); DEFINE_BUF_EVENT(xfs_buf_delwri_split); -DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf); DEFINE_BUF_EVENT(xfs_buf_get_uncached); DEFINE_BUF_EVENT(xfs_buf_item_relse); DEFINE_BUF_EVENT(xfs_buf_iodone_async); @@ -1147,6 +1146,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class, __field(xfs_ino_t, ino) __field(int, count) __field(int, pincount) + __field(unsigned long, iflags) __field(unsigned long, caller_ip) ), TP_fast_assign( @@ -1154,13 +1154,15 @@ DECLARE_EVENT_CLASS(xfs_iref_class, __entry->ino = ip->i_ino; __entry->count = atomic_read(&VFS_I(ip)->i_count); __entry->pincount = atomic_read(&ip->i_pincount); + __entry->iflags = ip->i_flags; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS", + TP_printk("dev %d:%d ino 0x%llx count %d pincount %d iflags 0x%lx caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->count, __entry->pincount, + __entry->iflags, (char *)__entry->caller_ip) ) @@ -1250,6 +1252,8 @@ DEFINE_IREF_EVENT(xfs_irele); DEFINE_IREF_EVENT(xfs_inode_pin); DEFINE_IREF_EVENT(xfs_inode_unpin); DEFINE_IREF_EVENT(xfs_inode_unpin_nowait); +DEFINE_IREF_EVENT(xfs_inode_push_pinned); +DEFINE_IREF_EVENT(xfs_inode_push_stale); DECLARE_EVENT_CLASS(xfs_namespace_class, TP_PROTO(struct xfs_inode *dp, const struct xfs_name *name), @@ -1654,6 +1658,8 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin); +DEFINE_LOG_ITEM_EVENT(xlog_ail_insert_abort); +DEFINE_LOG_ITEM_EVENT(xfs_trans_free_abort); DECLARE_EVENT_CLASS(xfs_ail_class, TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c6657072361a..b4a07af513ba 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -742,8 +742,10 @@ xfs_trans_free_items( list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { xfs_trans_del_item(lip); - if (abort) + if (abort) { + trace_xfs_trans_free_abort(lip); set_bit(XFS_LI_ABORTED, &lip->li_flags); + } if (lip->li_ops->iop_release) lip->li_ops->iop_release(lip); } diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 80add26c0111..01315ed75502 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -727,7 +727,7 @@ xfs_select_zone( for (;;) { prepare_to_wait(&zi->zi_zone_wait, &wait, TASK_UNINTERRUPTIBLE); oz = xfs_select_zone_nowait(mp, write_hint, pack_tight); - if (oz) + if (oz || xfs_is_shutdown(mp)) break; schedule(); } @@ -777,26 +777,6 @@ xfs_mark_rtg_boundary( ioend->io_flags |= IOMAP_IOEND_BOUNDARY; } -static void -xfs_submit_zoned_bio( - struct iomap_ioend *ioend, - struct xfs_open_zone *oz, - bool is_seq) -{ - ioend->io_bio.bi_iter.bi_sector = ioend->io_sector; - ioend->io_private = oz; - atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */ - - if (is_seq) { - ioend->io_bio.bi_opf &= ~REQ_OP_WRITE; - ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND; - } else { - xfs_mark_rtg_boundary(ioend); - } - - submit_bio(&ioend->io_bio); -} - /* * Cache the last zone written to for an inode so that it is considered first * for subsequent writes. @@ -891,6 +871,26 @@ xfs_zone_cache_create_association( xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru); } +static void +xfs_submit_zoned_bio( + struct iomap_ioend *ioend, + struct xfs_open_zone *oz, + bool is_seq) +{ + ioend->io_bio.bi_iter.bi_sector = ioend->io_sector; + ioend->io_private = oz; + atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */ + + if (is_seq) { + ioend->io_bio.bi_opf &= ~REQ_OP_WRITE; + ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND; + } else { + xfs_mark_rtg_boundary(ioend); + } + + submit_bio(&ioend->io_bio); +} + void xfs_zone_alloc_and_submit( struct iomap_ioend *ioend, diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 6f6b9de12cd3..db294d452e8c 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -202,6 +202,8 @@ struct shash_desc { #define HASH_REQUEST_CLONE(name, gfp) \ hash_request_clone(name, sizeof(__##name##_req), gfp) +#define CRYPTO_HASH_STATESIZE(coresize, blocksize) (coresize + blocksize + 1) + /** * struct shash_alg - synchronous message digest definition * @init: see struct ahash_alg diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h index b9bccd3ff57f..21a27fd5e198 100644 --- a/include/crypto/internal/sha2.h +++ b/include/crypto/internal/sha2.h @@ -25,7 +25,7 @@ void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS], void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks); -static inline void sha256_choose_blocks( +static __always_inline void sha256_choose_blocks( u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks, bool force_generic, bool force_simd) { diff --git a/include/crypto/internal/simd.h b/include/crypto/internal/simd.h index 7e7f1ac3b7fd..9e338e7aafbd 100644 --- a/include/crypto/internal/simd.h +++ b/include/crypto/internal/simd.h @@ -44,9 +44,11 @@ void simd_unregister_aeads(struct aead_alg *algs, int count, * * This delegates to may_use_simd(), except that this also returns false if SIMD * in crypto code has been temporarily disabled on this CPU by the crypto - * self-tests, in order to test the no-SIMD fallback code. + * self-tests, in order to test the no-SIMD fallback code. This override is + * currently limited to configurations where the "full" self-tests are enabled, + * because it might be a bit too invasive to be part of the "fast" self-tests. */ -#ifdef CONFIG_CRYPTO_SELFTESTS +#ifdef CONFIG_CRYPTO_SELFTESTS_FULL DECLARE_PER_CPU(bool, crypto_simd_disabled_for_test); #define crypto_simd_usable() \ (may_use_simd() && !this_cpu_read(crypto_simd_disabled_for_test)) diff --git a/include/crypto/md5.h b/include/crypto/md5.h index 198b5d69b92f..28ee533a0507 100644 --- a/include/crypto/md5.h +++ b/include/crypto/md5.h @@ -2,6 +2,7 @@ #ifndef _CRYPTO_MD5_H #define _CRYPTO_MD5_H +#include <crypto/hash.h> #include <linux/types.h> #define MD5_DIGEST_SIZE 16 @@ -15,6 +16,9 @@ #define MD5_H2 0x98badcfeUL #define MD5_H3 0x10325476UL +#define CRYPTO_MD5_STATESIZE \ + CRYPTO_HASH_STATESIZE(MD5_STATE_SIZE, MD5_HMAC_BLOCK_SIZE) + extern const u8 md5_zero_message_hash[MD5_DIGEST_SIZE]; struct md5_state { diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index 5c3b2aa3e69d..d344d41e6cfe 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -300,6 +300,9 @@ struct drm_file { * * Mapping of mm object handles to object pointers. Used by the GEM * subsystem. Protected by @table_lock. + * + * Note that allocated entries might be NULL as a transient state when + * creating or deleting a handle. */ struct idr object_idr; diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h index 668077009fce..38b24fc8978d 100644 --- a/include/drm/drm_framebuffer.h +++ b/include/drm/drm_framebuffer.h @@ -23,6 +23,7 @@ #ifndef __DRM_FRAMEBUFFER_H__ #define __DRM_FRAMEBUFFER_H__ +#include <linux/bits.h> #include <linux/ctype.h> #include <linux/list.h> #include <linux/sched.h> @@ -100,6 +101,8 @@ struct drm_framebuffer_funcs { unsigned num_clips); }; +#define DRM_FRAMEBUFFER_HAS_HANDLE_REF(_i) BIT(0u + (_i)) + /** * struct drm_framebuffer - frame buffer object * @@ -189,6 +192,10 @@ struct drm_framebuffer { */ int flags; /** + * @internal_flags: Framebuffer flags like DRM_FRAMEBUFFER_HAS_HANDLE_REF. + */ + unsigned int internal_flags; + /** * @filp_head: Placed on &drm_file.fbs, protected by &drm_file.fbs_lock. */ struct list_head filp_head; diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index b37860f4a895..6d2c08e81101 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -223,6 +223,9 @@ struct mipi_dsi_multi_context { #define to_mipi_dsi_device(__dev) container_of_const(__dev, struct mipi_dsi_device, dev) +extern const struct bus_type mipi_dsi_bus_type; +#define dev_is_mipi_dsi(dev) ((dev)->bus == &mipi_dsi_bus_type) + /** * mipi_dsi_pixel_format_to_bpp - obtain the number of bits per pixel for any * given pixel format defined by the MIPI DSI diff --git a/include/drm/spsc_queue.h b/include/drm/spsc_queue.h index 125f096c88cb..ee9df8cc67b7 100644 --- a/include/drm/spsc_queue.h +++ b/include/drm/spsc_queue.h @@ -70,9 +70,11 @@ static inline bool spsc_queue_push(struct spsc_queue *queue, struct spsc_node *n preempt_disable(); + atomic_inc(&queue->job_count); + smp_mb__after_atomic(); + tail = (struct spsc_node **)atomic_long_xchg(&queue->tail, (long)&node->next); WRITE_ONCE(*tail, node); - atomic_inc(&queue->job_count); /* * In case of first element verify new node will be visible to the consumer diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 5bded24dc24f..e1634897e159 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -283,6 +283,7 @@ struct ffa_indirect_msg_hdr { u32 offset; u32 send_recv_id; u32 size; + u32 res1; uuid_t uuid; }; diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 9b02961d65ee..45f2f278b50a 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -249,6 +249,12 @@ static inline void atm_account_tx(struct atm_vcc *vcc, struct sk_buff *skb) ATM_SKB(skb)->atm_options = vcc->atm_options; } +static inline void atm_return_tx(struct atm_vcc *vcc, struct sk_buff *skb) +{ + WARN_ON_ONCE(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, + &sk_atm(vcc)->sk_wmem_alloc)); +} + static inline void atm_force_charge(struct atm_vcc *vcc,int truesize) { atomic_add(truesize, &sk_atm(vcc)->sk_rmem_alloc); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a59880c809c7..181a0deadc9e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -269,11 +269,16 @@ static inline dev_t disk_devt(struct gendisk *disk) return MKDEV(disk->major, disk->first_minor); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * We should strive for 1 << (PAGE_SHIFT + MAX_PAGECACHE_ORDER) * however we constrain this to what we can validate and test. */ #define BLK_MAX_BLOCK_SIZE SZ_64K +#else +#define BLK_MAX_BLOCK_SIZE PAGE_SIZE +#endif + /* blk_validate_limits() validates bsize, so drivers don't usually need to */ static inline int blk_validate_block_size(unsigned long bsize) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 96a3a0d6a60e..6378370a952f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -82,6 +82,7 @@ extern ssize_t cpu_show_old_microcode(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/execmem.h b/include/linux/execmem.h index ca42d5e46ccc..3be35680a54f 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -54,7 +54,7 @@ enum execmem_range_flags { EXECMEM_ROX_CACHE = (1 << 1), }; -#if defined(CONFIG_ARCH_HAS_EXECMEM_ROX) && defined(CONFIG_EXECMEM) +#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX /** * execmem_fill_trapping_insns - set memory to contain instructions that * will trap @@ -94,15 +94,9 @@ int execmem_make_temp_rw(void *ptr, size_t size); * Return: 0 on success or negative error code on failure. */ int execmem_restore_rox(void *ptr, size_t size); - -/* - * Called from mark_readonly(), where the system transitions to ROX. - */ -void execmem_cache_make_ro(void); #else static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } -static inline void execmem_cache_make_ro(void) { } #endif /** diff --git a/include/linux/fs.h b/include/linux/fs.h index 4ec77da65f14..040c0036320f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -399,7 +399,9 @@ struct readahead_control; { IOCB_WAITQ, "WAITQ" }, \ { IOCB_NOIO, "NOIO" }, \ { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \ - { IOCB_DIO_CALLER_COMP, "CALLER_COMP" } + { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }, \ + { IOCB_AIO_RW, "AIO_RW" }, \ + { IOCB_HAS_METADATA, "AIO_HAS_METADATA" } struct kiocb { struct file *ki_filp; @@ -3606,6 +3608,8 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); +struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, + const struct inode *context_inode); extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); extern const struct dentry_operations simple_dentry_operations; diff --git a/include/linux/futex.h b/include/linux/futex.h index 005b040c4791..b37193653e6b 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -89,6 +89,7 @@ void futex_hash_free(struct mm_struct *mm); static inline void futex_mm_init(struct mm_struct *mm) { RCU_INIT_POINTER(mm->futex_phash, NULL); + mm->futex_phash_new = NULL; mutex_init(&mm->futex_hash_lock); } diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index ce377f7fb912..996be3c2cff0 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -663,18 +663,6 @@ static inline bool ieee80211_s1g_has_cssid(__le16 fc) } /** - * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon - * @fc: frame control bytes in little-endian byteorder - * Return: whether or not the frame is an S1G short beacon, - * i.e. it is an S1G beacon with 'next TBTT' flag set - */ -static inline bool ieee80211_is_s1g_short_beacon(__le16 fc) -{ - return ieee80211_is_s1g_beacon(fc) && - (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT)); -} - -/** * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an ATIM frame @@ -1278,7 +1266,7 @@ struct ieee80211_ext { u8 sa[ETH_ALEN]; __le32 timestamp; u8 change_seq; - u8 variable[0]; + u8 variable[]; } __packed s1g_beacon; } u; } __packed __aligned(2); @@ -1536,7 +1524,7 @@ struct ieee80211_mgmt { u8 action_code; u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed tdls_discover_resp; struct { u8 action_code; @@ -1721,35 +1709,35 @@ struct ieee80211_tdls_data { struct { u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed setup_req; struct { __le16 status_code; u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed setup_resp; struct { __le16 status_code; u8 dialog_token; - u8 variable[0]; + u8 variable[]; } __packed setup_cfm; struct { __le16 reason_code; - u8 variable[0]; + u8 variable[]; } __packed teardown; struct { u8 dialog_token; - u8 variable[0]; + u8 variable[]; } __packed discover_req; struct { u8 target_channel; u8 oper_class; - u8 variable[0]; + u8 variable[]; } __packed chan_switch_req; struct { __le16 status_code; - u8 variable[0]; + u8 variable[]; } __packed chan_switch_resp; } u; } __packed; @@ -4901,6 +4889,39 @@ static inline bool ieee80211_is_ftm(struct sk_buff *skb) return false; } +/** + * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon + * @fc: frame control bytes in little-endian byteorder + * @variable: pointer to the beacon frame elements + * @variable_len: length of the frame elements + * Return: whether or not the frame is an S1G short beacon. As per + * IEEE80211-2024 11.1.3.10.1, The S1G beacon compatibility element shall + * always be present as the first element in beacon frames generated at a + * TBTT (Target Beacon Transmission Time), so any frame not containing + * this element must have been generated at a TSBTT (Target Short Beacon + * Transmission Time) that is not a TBTT. Additionally, short beacons are + * prohibited from containing the S1G beacon compatibility element as per + * IEEE80211-2024 9.3.4.3 Table 9-76, so if we have an S1G beacon with + * either no elements or the first element is not the beacon compatibility + * element, we have a short beacon. + */ +static inline bool ieee80211_is_s1g_short_beacon(__le16 fc, const u8 *variable, + size_t variable_len) +{ + if (!ieee80211_is_s1g_beacon(fc)) + return false; + + /* + * If the frame does not contain at least 1 element (this is perfectly + * valid in a short beacon) and is an S1G beacon, we have a short + * beacon. + */ + if (variable_len < 2) + return true; + + return variable[0] != WLAN_EID_S1G_BCN_COMPAT; +} + struct element { u8 id; u8 datalen; diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 2922635986f5..a7efcec2e3d0 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -698,6 +698,8 @@ struct io_kiocb { struct hlist_node hash_node; /* For IOPOLL setup queues, with hybrid polling */ u64 iopoll_start; + /* for private io_kiocb freeing */ + struct rcu_head rcu_head; }; /* internal polling, see IORING_FEAT_FAST_POLL */ struct async_poll *apoll; diff --git a/include/linux/irqchip/irq-msi-lib.h b/include/linux/irqchip/irq-msi-lib.h index dd8d1d138544..224ac28e88d7 100644 --- a/include/linux/irqchip/irq-msi-lib.h +++ b/include/linux/irqchip/irq-msi-lib.h @@ -17,6 +17,7 @@ #define MATCH_PLATFORM_MSI BIT(DOMAIN_BUS_PLATFORM_MSI) +struct msi_domain_info; int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 93a73c076d16..fbd424b2abb1 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -28,6 +28,7 @@ extern void kmemleak_update_trace(const void *ptr) __ref; extern void kmemleak_not_leak(const void *ptr) __ref; extern void kmemleak_transient_leak(const void *ptr) __ref; extern void kmemleak_ignore(const void *ptr) __ref; +extern void kmemleak_ignore_percpu(const void __percpu *ptr) __ref; extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref; extern void kmemleak_no_scan(const void *ptr) __ref; extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, @@ -97,6 +98,9 @@ static inline void kmemleak_not_leak(const void *ptr) static inline void kmemleak_transient_leak(const void *ptr) { } +static inline void kmemleak_ignore_percpu(const void __percpu *ptr) +{ +} static inline void kmemleak_ignore(const void *ptr) { } diff --git a/include/linux/libata.h b/include/linux/libata.h index 31be45fd47a6..1e5aec839041 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1352,7 +1352,7 @@ int ata_acpi_stm(struct ata_port *ap, const struct ata_acpi_gtm *stm); int ata_acpi_gtm(struct ata_port *ap, struct ata_acpi_gtm *stm); unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, const struct ata_acpi_gtm *gtm); -int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm); +int ata_acpi_cbl_pata_type(struct ata_port *ap); #else static inline const struct ata_acpi_gtm *ata_acpi_init_gtm(struct ata_port *ap) { @@ -1377,10 +1377,9 @@ static inline unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, return 0; } -static inline int ata_acpi_cbl_80wire(struct ata_port *ap, - const struct ata_acpi_gtm *gtm) +static inline int ata_acpi_cbl_pata_type(struct ata_port *ap) { - return 0; + return ATA_CBL_PATA40; } #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index 0ef2ba0c667a..fa538feaa8d9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2568,6 +2568,11 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) return percpu_counter_read_positive(&mm->rss_stat[member]); } +static inline unsigned long get_mm_counter_sum(struct mm_struct *mm, int member) +{ + return percpu_counter_sum_positive(&mm->rss_stat[member]); +} + void mm_trace_rss_stat(struct mm_struct *mm, int member); static inline void add_mm_counter(struct mm_struct *mm, int member, long value) diff --git a/include/linux/module.h b/include/linux/module.h index 92e1420fccdf..5faa1fb1f4b4 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -586,11 +586,6 @@ struct module { atomic_t refcnt; #endif -#ifdef CONFIG_MITIGATION_ITS - int its_num_pages; - void **its_page_array; -#endif - #ifdef CONFIG_CONSTRUCTORS /* Constructor functions. */ ctor_fn_t *ctors; diff --git a/include/linux/mount.h b/include/linux/mount.h index 4880f434c021..1a508beba446 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -116,10 +116,8 @@ extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); int do_mount(const char *, const char __user *, const char *, unsigned long, void *); -extern struct vfsmount *collect_mounts(const struct path *); -extern void drop_collected_mounts(struct vfsmount *); -extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, - struct vfsmount *); +extern struct path *collect_paths(const struct path *, struct path *, unsigned); +extern void drop_collected_paths(struct path *, struct path *); extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num); extern int cifs_root_data(char **dev, char **opts); diff --git a/include/linux/mtd/nand-qpic-common.h b/include/linux/mtd/nand-qpic-common.h index e8462deda6db..f0aa098a395f 100644 --- a/include/linux/mtd/nand-qpic-common.h +++ b/include/linux/mtd/nand-qpic-common.h @@ -237,6 +237,9 @@ * @last_data_desc - last DMA desc in data channel (tx/rx). * @last_cmd_desc - last DMA desc in command channel. * @txn_done - completion for NAND transfer. + * @bam_ce_nitems - the number of elements in the @bam_ce array + * @cmd_sgl_nitems - the number of elements in the @cmd_sgl array + * @data_sgl_nitems - the number of elements in the @data_sgl array * @bam_ce_pos - the index in bam_ce which is available for next sgl * @bam_ce_start - the index in bam_ce which marks the start position ce * for current sgl. It will be used for size calculation @@ -255,6 +258,11 @@ struct bam_transaction { struct dma_async_tx_descriptor *last_data_desc; struct dma_async_tx_descriptor *last_cmd_desc; struct completion txn_done; + + unsigned int bam_ce_nitems; + unsigned int cmd_sgl_nitems; + unsigned int data_sgl_nitems; + struct_group(bam_positions, u32 bam_ce_pos; u32 bam_ce_start; diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index 5daf80df9e89..b74a539ec581 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -108,7 +108,7 @@ extern void deregister_mtd_parser(struct mtd_part_parser *parser); deregister_mtd_parser) int mtd_add_partition(struct mtd_info *master, const char *name, - long long offset, long long length, struct mtd_info **part); + long long offset, long long length); int mtd_del_partition(struct mtd_info *master, int partno); uint64_t mtd_get_device_size(const struct mtd_info *mtd); diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 811a0f356315..15eaa09da998 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -113,11 +113,12 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len, ...) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ SPI_MEM_OP_ADDR(2, addr, 2), \ SPI_MEM_OP_DUMMY(ndummy, 2), \ - SPI_MEM_OP_DATA_IN(len, buf, 2)) + SPI_MEM_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) #define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_2S_2S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ @@ -151,11 +152,12 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 4), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len, ...) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ SPI_MEM_OP_ADDR(2, addr, 4), \ SPI_MEM_OP_DUMMY(ndummy, 4), \ - SPI_MEM_OP_DATA_IN(len, buf, 4)) + SPI_MEM_OP_DATA_IN(len, buf, 4), \ + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) #define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_4S_4S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 065c17385e53..f43f075852c0 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -265,21 +265,20 @@ struct netfs_io_request { bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ refcount_t ref; unsigned long flags; -#define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */ -#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ -#define NETFS_RREQ_FAILED 4 /* The request failed */ -#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes (has ref) */ -#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ -#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ -#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ +#define NETFS_RREQ_IN_PROGRESS 0 /* Unlocked when the request completes (has ref) */ +#define NETFS_RREQ_ALL_QUEUED 1 /* All subreqs are now queued */ +#define NETFS_RREQ_PAUSE 2 /* Pause subrequest generation */ +#define NETFS_RREQ_FAILED 3 /* The request failed */ +#define NETFS_RREQ_RETRYING 4 /* Set if we're in the retry path */ +#define NETFS_RREQ_SHORT_TRANSFER 5 /* Set if we have a short transfer */ +#define NETFS_RREQ_OFFLOAD_COLLECTION 8 /* Offload collection to workqueue */ +#define NETFS_RREQ_NO_UNLOCK_FOLIO 9 /* Don't unlock no_unlock_folio on completion */ +#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 10 /* Copy current folio to cache from read */ +#define NETFS_RREQ_UPLOAD_TO_SERVER 11 /* Need to write to the server */ #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ -#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ -#define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */ -#define NETFS_RREQ_SHORT_TRANSFER 15 /* Set if we have a short transfer */ #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; - void (*cleanup)(struct netfs_io_request *req); }; /* diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 52dc7cfab0e0..ec9d96025683 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -635,8 +635,46 @@ struct perf_addr_filter_range { unsigned long size; }; -/** - * enum perf_event_state - the states of an event: +/* + * The normal states are: + * + * ACTIVE --. + * ^ | + * | | + * sched_{in,out}() | + * | | + * v | + * ,---> INACTIVE --+ <-. + * | | | + * | {dis,en}able() + * sched_in() | | + * | OFF <--' --+ + * | | + * `---> ERROR ------' + * + * That is: + * + * sched_in: INACTIVE -> {ACTIVE,ERROR} + * sched_out: ACTIVE -> INACTIVE + * disable: {ACTIVE,INACTIVE} -> OFF + * enable: {OFF,ERROR} -> INACTIVE + * + * Where {OFF,ERROR} are disabled states. + * + * Then we have the {EXIT,REVOKED,DEAD} states which are various shades of + * defunct events: + * + * - EXIT means task that the even was assigned to died, but child events + * still live, and further children can still be created. But the event + * itself will never be active again. It can only transition to + * {REVOKED,DEAD}; + * + * - REVOKED means the PMU the event was associated with is gone; all + * functionality is stopped but the event is still alive. Can only + * transition to DEAD; + * + * - DEAD event really is DYING tearing down state and freeing bits. + * */ enum perf_event_state { PERF_EVENT_STATE_DEAD = -5, diff --git a/arch/x86/include/asm/amd/fch.h b/include/linux/platform_data/x86/amd-fch.h index 2cf5153edbc2..2cf5153edbc2 100644 --- a/arch/x86/include/asm/amd/fch.h +++ b/include/linux/platform_data/x86/amd-fch.h diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 0b3a36bdaa90..0f5f94137f6d 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -594,6 +594,7 @@ struct sev_data_snp_addr { * @imi_en: launch flow is launching an IMI (Incoming Migration Image) for the * purpose of guest-assisted migration. * @rsvd: reserved + * @desired_tsc_khz: hypervisor desired mean TSC freq in kHz of the guest * @gosvw: guest OS-visible workarounds, as defined by hypervisor */ struct sev_data_snp_launch_start { @@ -603,6 +604,7 @@ struct sev_data_snp_launch_start { u32 ma_en:1; /* In */ u32 imi_en:1; /* In */ u32 rsvd:30; + u32 desired_tsc_khz; /* In */ u8 gosvw[16]; /* In */ } __packed; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 9ba771f2ddea..6fb4894b8cfd 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -159,7 +159,7 @@ struct rdt_ctrl_domain { /** * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource * @hdr: common header for different domain types - * @ci: cache info for this domain + * @ci_id: cache info id for this domain * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold * @mbm_total: saved state for MBM total bandwidth * @mbm_local: saved state for MBM local bandwidth @@ -170,7 +170,7 @@ struct rdt_ctrl_domain { */ struct rdt_mon_domain { struct rdt_domain_hdr hdr; - struct cacheinfo *ci; + unsigned int ci_id; unsigned long *rmid_busy_llc; struct mbm_state *mbm_total; struct mbm_state *mbm_local; diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f78a64beb52..aa9c5be7a632 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -548,10 +548,6 @@ struct sched_statistics { u64 nr_failed_migrations_running; u64 nr_failed_migrations_hot; u64 nr_forced_migrations; -#ifdef CONFIG_NUMA_BALANCING - u64 numa_task_migrated; - u64 numa_task_swapped; -#endif u64 nr_wakeups; u64 nr_wakeups_sync; diff --git a/include/linux/soc/amd/isp4_misc.h b/include/linux/soc/amd/isp4_misc.h new file mode 100644 index 000000000000..6738796986a7 --- /dev/null +++ b/include/linux/soc/amd/isp4_misc.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright (C) 2025 Advanced Micro Devices, Inc. + */ + +#ifndef __SOC_ISP4_MISC_H +#define __SOC_ISP4_MISC_H + +#define AMDISP_I2C_ADAP_NAME "AMDISP DesignWare I2C adapter" + +#endif diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 4789f91dae94..e9ea43234d9a 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -21,7 +21,7 @@ #include <uapi/linux/spi/spi.h> /* Max no. of CS supported per spi device */ -#define SPI_CS_CNT_MAX 16 +#define SPI_CS_CNT_MAX 24 struct dma_chan; struct software_node; diff --git a/include/linux/suspend.h b/include/linux/suspend.h index b1c76c8f2c82..6a3f92098872 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -446,6 +446,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb); extern void ksys_sync_helper(void); extern void pm_report_hw_sleep_time(u64 t); extern void pm_report_max_hw_sleep(u64 t); +void pm_restrict_gfp_mask(void); +void pm_restore_gfp_mask(void); #define pm_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ @@ -492,6 +494,9 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) static inline void pm_report_hw_sleep_time(u64 t) {}; static inline void pm_report_max_hw_sleep(u64 t) {}; +static inline void pm_restrict_gfp_mask(void) {} +static inline void pm_restore_gfp_mask(void) {} + static inline void ksys_sync_helper(void) {} #define pm_notifier(fn, pri) do { (void)(fn); } while (0) diff --git a/include/linux/usb.h b/include/linux/usb.h index 1b2545b4363b..92c752f5446f 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -614,6 +614,7 @@ struct usb3_lpm_parameters { * FIXME -- complete doc * @authenticated: Crypto authentication passed * @tunnel_mode: Connection native or tunneled over USB4 + * @usb4_link: device link to the USB4 host interface * @lpm_capable: device supports LPM * @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range * @usb2_hw_lpm_capable: device can perform USB2 hardware LPM @@ -724,6 +725,7 @@ struct usb_device { unsigned reset_resume:1; unsigned port_is_suspended:1; enum usb_link_tunnel_mode tunnel_mode; + struct device_link *usb4_link; int slot_id; struct usb2_lpm_parameters l1_params; diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h index f2da264d9c14..acb0ad03bdac 100644 --- a/include/linux/usb/typec_dp.h +++ b/include/linux/usb/typec_dp.h @@ -57,6 +57,7 @@ enum { DP_PIN_ASSIGN_D, DP_PIN_ASSIGN_E, DP_PIN_ASSIGN_F, /* Not supported after v1.0b */ + DP_PIN_ASSIGN_MAX, }; /* DisplayPort alt mode specific commands */ diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 91a3ce9a2687..9e15a088ba38 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -66,8 +66,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NUMA_HINT_FAULTS, NUMA_HINT_FAULTS_LOCAL, NUMA_PAGE_MIGRATE, - NUMA_TASK_MIGRATE, - NUMA_TASK_SWAP, #endif #ifdef CONFIG_MIGRATION PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index d56e6e135158..d40e978126e3 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -243,8 +243,8 @@ int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); -#ifdef CONFIG_BPF_SYSCALL extern struct proto vsock_proto; +#ifdef CONFIG_BPF_SYSCALL int vsock_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void __init vsock_bpf_build_proto(void); #else diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a760f05fa3fb..0da011fc8146 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -29,6 +29,7 @@ #include <linux/idr.h> #include <linux/leds.h> #include <linux/rculist.h> +#include <linux/srcu.h> #include <net/bluetooth/hci.h> #include <net/bluetooth/hci_drv.h> @@ -347,6 +348,7 @@ struct adv_monitor { struct hci_dev { struct list_head list; + struct srcu_struct srcu; struct mutex lock; struct ida unset_handle_ida; @@ -1348,8 +1350,7 @@ hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != BIS_LINK || bacmp(&c->dst, BDADDR_ANY) || - c->state != state) + if (c->type != BIS_LINK || c->state != state) continue; if (handle == c->iso_qos.bcast.big) { diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d711642e78b5..c003cd194fa2 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -370,7 +370,7 @@ static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb) static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto) { - if (!pskb_may_pull(skb, PPPOE_SES_HLEN)) + if (!pskb_may_pull(skb, ETH_HLEN + PPPOE_SES_HLEN)) return false; *inner_proto = __nf_flow_pppoe_proto(skb); diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index d7b7b6cd4aa1..8a75c73fc555 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -114,7 +114,6 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct netlink_ext_ack *extack); void qdisc_put_rtab(struct qdisc_rate_table *tab); void qdisc_put_stab(struct qdisc_size_table *tab); -void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc); bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq, spinlock_t *root_lock, bool validate); @@ -290,4 +289,28 @@ static inline bool tc_qdisc_stats_dump(struct Qdisc *sch, return true; } +static inline void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) +{ + if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { + pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", + txt, qdisc->ops->id, qdisc->handle >> 16); + qdisc->flags |= TCQ_F_WARN_NONWC; + } +} + +static inline unsigned int qdisc_peek_len(struct Qdisc *sch) +{ + struct sk_buff *skb; + unsigned int len; + + skb = sch->ops->peek(sch); + if (unlikely(skb == NULL)) { + qdisc_warn_nonwc("qdisc_peek_len", sch); + return 0; + } + len = qdisc_pkt_len(skb); + + return len; +} + #endif diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index a5f4b9234f46..dad7360f42f9 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -211,24 +211,6 @@ TRACE_EVENT(erofs_map_blocks_exit, show_mflags(__entry->mflags), __entry->ret) ); -TRACE_EVENT(erofs_destroy_inode, - TP_PROTO(struct inode *inode), - - TP_ARGS(inode), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( erofs_nid_t, nid ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->nid = EROFS_I(inode)->nid; - ), - - TP_printk("dev = (%d,%d), nid = %llu", show_dev_nid(__entry)) -); - #endif /* _TRACE_EROFS_H */ /* This part must be outside protection */ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 333d2e38dd2c..73e96ccbe830 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -50,12 +50,14 @@ #define netfs_rreq_traces \ EM(netfs_rreq_trace_assess, "ASSESS ") \ - EM(netfs_rreq_trace_copy, "COPY ") \ EM(netfs_rreq_trace_collect, "COLLECT") \ EM(netfs_rreq_trace_complete, "COMPLET") \ + EM(netfs_rreq_trace_copy, "COPY ") \ EM(netfs_rreq_trace_dirty, "DIRTY ") \ EM(netfs_rreq_trace_done, "DONE ") \ EM(netfs_rreq_trace_free, "FREE ") \ + EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \ + EM(netfs_rreq_trace_recollect, "RECLLCT") \ EM(netfs_rreq_trace_redirty, "REDIRTY") \ EM(netfs_rreq_trace_resubmit, "RESUBMT") \ EM(netfs_rreq_trace_set_abandon, "S-ABNDN") \ @@ -63,13 +65,15 @@ EM(netfs_rreq_trace_unlock, "UNLOCK ") \ EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \ EM(netfs_rreq_trace_unmark, "UNMARK ") \ + EM(netfs_rreq_trace_unpause, "UNPAUSE") \ EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \ - EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \ - EM(netfs_rreq_trace_wait_queue, "WAIT-Q ") \ + EM(netfs_rreq_trace_wait_pause, "--PAUSED--") \ + EM(netfs_rreq_trace_wait_quiesce, "WAIT-QUIESCE") \ + EM(netfs_rreq_trace_waited_ip, "DONE-IP") \ + EM(netfs_rreq_trace_waited_pause, "--UNPAUSED--") \ + EM(netfs_rreq_trace_waited_quiesce, "DONE-QUIESCE") \ EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \ EM(netfs_rreq_trace_wake_queue, "WAKE-Q ") \ - EM(netfs_rreq_trace_woke_queue, "WOKE-Q ") \ - EM(netfs_rreq_trace_unpause, "UNPAUSE") \ E_(netfs_rreq_trace_write_done, "WR-DONE") #define netfs_sreq_sources \ @@ -82,6 +86,7 @@ E_(NETFS_WRITE_TO_CACHE, "WRIT") #define netfs_sreq_traces \ + EM(netfs_sreq_trace_abandoned, "ABNDN") \ EM(netfs_sreq_trace_add_donations, "+DON ") \ EM(netfs_sreq_trace_added, "ADD ") \ EM(netfs_sreq_trace_cache_nowrite, "CA-NW") \ @@ -89,6 +94,7 @@ EM(netfs_sreq_trace_cache_write, "CA-WR") \ EM(netfs_sreq_trace_cancel, "CANCL") \ EM(netfs_sreq_trace_clear, "CLEAR") \ + EM(netfs_sreq_trace_consumed, "CONSM") \ EM(netfs_sreq_trace_discard, "DSCRD") \ EM(netfs_sreq_trace_donate_to_prev, "DON-P") \ EM(netfs_sreq_trace_donate_to_next, "DON-N") \ @@ -96,7 +102,12 @@ EM(netfs_sreq_trace_fail, "FAIL ") \ EM(netfs_sreq_trace_free, "FREE ") \ EM(netfs_sreq_trace_hit_eof, "EOF ") \ - EM(netfs_sreq_trace_io_progress, "IO ") \ + EM(netfs_sreq_trace_io_bad, "I-BAD") \ + EM(netfs_sreq_trace_io_malformed, "I-MLF") \ + EM(netfs_sreq_trace_io_unknown, "I-UNK") \ + EM(netfs_sreq_trace_io_progress, "I-OK ") \ + EM(netfs_sreq_trace_io_req_submitted, "I-RSB") \ + EM(netfs_sreq_trace_io_retry_needed, "I-RTR") \ EM(netfs_sreq_trace_limited, "LIMIT") \ EM(netfs_sreq_trace_need_clear, "N-CLR") \ EM(netfs_sreq_trace_partial_read, "PARTR") \ @@ -142,8 +153,8 @@ #define netfs_sreq_ref_traces \ EM(netfs_sreq_trace_get_copy_to_cache, "GET COPY2C ") \ - EM(netfs_sreq_trace_get_resubmit, "GET RESUBMIT") \ - EM(netfs_sreq_trace_get_submit, "GET SUBMIT") \ + EM(netfs_sreq_trace_get_resubmit, "GET RESUBMT") \ + EM(netfs_sreq_trace_get_submit, "GET SUBMIT ") \ EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \ EM(netfs_sreq_trace_new, "NEW ") \ EM(netfs_sreq_trace_put_abandon, "PUT ABANDON") \ @@ -366,7 +377,7 @@ TRACE_EVENT(netfs_sreq, __entry->slot = sreq->io_iter.folioq_slot; ), - TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx s=%u e=%d", + TP_printk("R=%08x[%x] %s %s f=%03x s=%llx %zx/%zx s=%u e=%d", __entry->rreq, __entry->index, __print_symbolic(__entry->source, netfs_sreq_sources), __print_symbolic(__entry->what, netfs_sreq_traces), diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 9ff72cfb2e98..09a75bdb6560 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -208,10 +208,6 @@ enum { ETHTOOL_A_STATS_PHY_MAX = (__ETHTOOL_A_STATS_PHY_CNT - 1) }; -/* generic netlink info */ -#define ETHTOOL_GENL_NAME "ethtool" -#define ETHTOOL_GENL_VERSION 1 - #define ETHTOOL_MCGRP_MONITOR_NAME "monitor" #endif /* _UAPI_LINUX_ETHTOOL_NETLINK_H_ */ diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 9a02f579de22..aa8ab5227c1e 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -6,8 +6,8 @@ #ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H #define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H -#define ETHTOOL_FAMILY_NAME "ethtool" -#define ETHTOOL_FAMILY_VERSION 1 +#define ETHTOOL_GENL_NAME "ethtool" +#define ETHTOOL_GENL_VERSION 1 enum { ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d00b85cb168c..7a4c35ff03fe 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -178,6 +178,7 @@ struct kvm_xen_exit { #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 +#define KVM_EXIT_TDX 40 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -447,6 +448,31 @@ struct kvm_run { __u64 gpa; __u64 size; } memory_fault; + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + __u64 ret; + __u64 data[5]; + } unknown; + struct { + __u64 ret; + __u64 gpa; + __u64 size; + } get_quote; + struct { + __u64 ret; + __u64 leaf; + __u64 r11, r12, r13, r14; + } get_tdvmcall_info; + struct { + __u64 ret; + __u64 vector; + } setup_event_notify; + }; + } tdx; /* Fix the size of the union. */ char padding[256]; }; diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h index 84fa8a21dfd0..6ac84b2f636c 100644 --- a/include/uapi/linux/mptcp_pm.h +++ b/include/uapi/linux/mptcp_pm.h @@ -27,14 +27,14 @@ * token, rem_id. * @MPTCP_EVENT_SUB_ESTABLISHED: A new subflow has been established. 'error' * should not be set. Attributes: token, family, loc_id, rem_id, saddr4 | - * saddr6, daddr4 | daddr6, sport, dport, backup, if_idx [, error]. + * saddr6, daddr4 | daddr6, sport, dport, backup, if-idx [, error]. * @MPTCP_EVENT_SUB_CLOSED: A subflow has been closed. An error (copy of * sk_err) could be set if an error has been detected for this subflow. * Attributes: token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | - * daddr6, sport, dport, backup, if_idx [, error]. + * daddr6, sport, dport, backup, if-idx [, error]. * @MPTCP_EVENT_SUB_PRIORITY: The priority of a subflow has changed. 'error' * should not be set. Attributes: token, family, loc_id, rem_id, saddr4 | - * saddr6, daddr4 | daddr6, sport, dport, backup, if_idx [, error]. + * saddr6, daddr4 | daddr6, sport, dport, backup, if-idx [, error]. * @MPTCP_EVENT_LISTENER_CREATED: A new PM listener is created. Attributes: * family, sport, saddr4 | saddr6. * @MPTCP_EVENT_LISTENER_CLOSED: A PM listener is closed. Attributes: family, diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 77d9d6af46da..c9751bdfd937 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -135,8 +135,28 @@ #define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS) /* - * zero copy requires 4k block size, and can remap ublk driver's io - * request into ublksrv's vm space + * ublk server can register data buffers for incoming I/O requests with a sparse + * io_uring buffer table. The request buffer can then be used as the data buffer + * for io_uring operations via the fixed buffer index. + * Note that the ublk server can never directly access the request data memory. + * + * To use this feature, the ublk server must first register a sparse buffer + * table on an io_uring instance. + * When an incoming ublk request is received, the ublk server submits a + * UBLK_U_IO_REGISTER_IO_BUF command to that io_uring instance. The + * ublksrv_io_cmd's q_id and tag specify the request whose buffer to register + * and addr is the index in the io_uring's buffer table to install the buffer. + * SQEs can now be submitted to the io_uring to read/write the request's buffer + * by enabling fixed buffers (e.g. using IORING_OP_{READ,WRITE}_FIXED or + * IORING_URING_CMD_FIXED) and passing the registered buffer index in buf_index. + * Once the last io_uring operation using the request's buffer has completed, + * the ublk server submits a UBLK_U_IO_UNREGISTER_IO_BUF command with q_id, tag, + * and addr again specifying the request buffer to unregister. + * The ublk request is completed when its buffer is unregistered from all + * io_uring instances and the ublk server issues UBLK_U_IO_COMMIT_AND_FETCH_REQ. + * + * Not available for UBLK_F_UNPRIVILEGED_DEV, as a ublk server can leak + * uninitialized kernel memory by not reading into the full request buffer. */ #define UBLK_F_SUPPORT_ZERO_COPY (1ULL << 0) @@ -450,10 +470,10 @@ static inline struct ublk_auto_buf_reg ublk_sqe_addr_to_auto_buf_reg( __u64 sqe_addr) { struct ublk_auto_buf_reg reg = { - .index = sqe_addr & 0xffff, - .flags = (sqe_addr >> 16) & 0xff, - .reserved0 = (sqe_addr >> 24) & 0xff, - .reserved1 = sqe_addr >> 32, + .index = (__u16)sqe_addr, + .flags = (__u8)(sqe_addr >> 16), + .reserved0 = (__u8)(sqe_addr >> 24), + .reserved1 = (__u32)(sqe_addr >> 32), }; return reg; diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h index ed07181d4eff..e05280e41522 100644 --- a/include/uapi/linux/vm_sockets.h +++ b/include/uapi/linux/vm_sockets.h @@ -17,6 +17,10 @@ #ifndef _UAPI_VM_SOCKETS_H #define _UAPI_VM_SOCKETS_H +#ifndef __KERNEL__ +#include <sys/socket.h> /* for struct sockaddr and sa_family_t */ +#endif + #include <linux/socket.h> #include <linux/types.h> diff --git a/init/Kconfig b/init/Kconfig index af4c2f085455..666783eb50ab 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1716,9 +1716,13 @@ config FUTEX_PI depends on FUTEX && RT_MUTEXES default y +# +# marked broken for performance reasons; gives us one more cycle to sort things out. +# config FUTEX_PRIVATE_HASH bool depends on FUTEX && !BASE_SMALL && MMU + depends on BROKEN default y config FUTEX_MPOL diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index cd1fcb115739..be91edf34f01 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -1259,8 +1259,10 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) atomic_set(&wq->worker_refs, 1); init_completion(&wq->worker_done); ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); - if (ret) + if (ret) { + put_task_struct(wq->task); goto err; + } return wq; err: diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index d59c12277d58..66c1ca73f55e 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -98,8 +98,6 @@ struct llist_node *io_handle_tw_list(struct llist_node *node, unsigned int *coun struct llist_node *tctx_task_work_run(struct io_uring_task *tctx, unsigned int max_entries, unsigned int *count); void tctx_task_work(struct callback_head *cb); __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd); -int io_uring_alloc_task_context(struct task_struct *task, - struct io_ring_ctx *ctx); int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file, int start, int end); diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index ce95e3af44a9..f2d2cc319faa 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -271,6 +271,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, if (len > arg->max_len) { len = arg->max_len; if (!(bl->flags & IOBL_INC)) { + arg->partial_map = 1; if (iov != arg->iovs) break; buf->len = len; diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index 5d83c7adc739..723d0361898e 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -58,7 +58,8 @@ struct buf_sel_arg { size_t max_len; unsigned short nr_iovs; unsigned short mode; - unsigned buf_group; + unsigned short buf_group; + unsigned short partial_map; }; void __user *io_buffer_select(struct io_kiocb *req, size_t *len, diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 71400d6cefc8..4c2578f2efcb 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -82,7 +82,7 @@ static void io_msg_tw_complete(struct io_kiocb *req, io_tw_token_t tw) spin_unlock(&ctx->msg_lock); } if (req) - kmem_cache_free(req_cachep, req); + kfree_rcu(req, rcu_head); percpu_ref_put(&ctx->refs); } @@ -90,7 +90,7 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req, int res, u32 cflags, u64 user_data) { if (!READ_ONCE(ctx->submitter_task)) { - kmem_cache_free(req_cachep, req); + kfree_rcu(req, rcu_head); return -EOWNERDEAD; } req->opcode = IORING_OP_NOP; diff --git a/io_uring/net.c b/io_uring/net.c index e16633fd6630..43a43522f406 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -75,12 +75,17 @@ struct io_sr_msg { u16 flags; /* initialised and used only by !msg send variants */ u16 buf_group; - bool retry; + unsigned short retry_flags; void __user *msg_control; /* used only for send zerocopy */ struct io_kiocb *notif; }; +enum sr_retry_flags { + IO_SR_MSG_RETRY = 1, + IO_SR_MSG_PARTIAL_MAP = 2, +}; + /* * Number of times we'll try and do receives if there's more data. If we * exceed this limit, then add us to the back of the queue and retry from @@ -187,7 +192,7 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req, req->flags &= ~REQ_F_BL_EMPTY; sr->done_io = 0; - sr->retry = false; + sr->retry_flags = 0; sr->len = 0; /* get from the provided buffer */ } @@ -397,7 +402,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); sr->done_io = 0; - sr->retry = false; + sr->retry_flags = 0; sr->len = READ_ONCE(sqe->len); sr->flags = READ_ONCE(sqe->ioprio); if (sr->flags & ~SENDMSG_FLAGS) @@ -751,7 +756,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); sr->done_io = 0; - sr->retry = false; + sr->retry_flags = 0; if (unlikely(sqe->file_index || sqe->addr2)) return -EINVAL; @@ -821,9 +826,9 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, if (sr->flags & IORING_RECVSEND_BUNDLE) { size_t this_ret = *ret - sr->done_io; - cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, this_ret), + cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret), issue_flags); - if (sr->retry) + if (sr->retry_flags & IO_SR_MSG_RETRY) cflags = req->cqe.flags | (cflags & CQE_F_MASK); /* bundle with no more immediate buffers, we're done */ if (req->flags & REQ_F_BL_EMPTY) @@ -832,12 +837,12 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, * If more is available AND it was a full transfer, retry and * append to this one */ - if (!sr->retry && kmsg->msg.msg_inq > 1 && this_ret > 0 && + if (!sr->retry_flags && kmsg->msg.msg_inq > 1 && this_ret > 0 && !iov_iter_count(&kmsg->msg.msg_iter)) { req->cqe.flags = cflags & ~CQE_F_MASK; sr->len = kmsg->msg.msg_inq; sr->done_io += this_ret; - sr->retry = true; + sr->retry_flags |= IO_SR_MSG_RETRY; return false; } } else { @@ -1077,6 +1082,14 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg if (unlikely(ret < 0)) return ret; + if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { + kmsg->vec.nr = ret; + kmsg->vec.iovec = arg.iovs; + req->flags |= REQ_F_NEED_CLEANUP; + } + if (arg.partial_map) + sr->retry_flags |= IO_SR_MSG_PARTIAL_MAP; + /* special case 1 vec, can be a fast path */ if (ret == 1) { sr->buf = arg.iovs[0].iov_base; @@ -1085,11 +1098,6 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg } iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret, arg.out_len); - if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) { - kmsg->vec.nr = ret; - kmsg->vec.iovec = arg.iovs; - req->flags |= REQ_F_NEED_CLEANUP; - } } else { void __user *buf; @@ -1275,7 +1283,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) int ret; zc->done_io = 0; - zc->retry = false; + zc->retry_flags = 0; if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) return -EINVAL; diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 6e0882b051f9..6de6229207a8 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -216,6 +216,7 @@ const struct io_issue_def io_issue_defs[] = { }, [IORING_OP_FALLOCATE] = { .needs_file = 1, + .hash_reg_file = 1, .prep = io_fallocate_prep, .issue = io_fallocate, }, diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index c592ceace97d..f2b31fb68992 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -112,8 +112,11 @@ static void io_release_ubuf(void *priv) struct io_mapped_ubuf *imu = priv; unsigned int i; - for (i = 0; i < imu->nr_bvecs; i++) - unpin_user_page(imu->bvec[i].bv_page); + for (i = 0; i < imu->nr_bvecs; i++) { + struct folio *folio = page_folio(imu->bvec[i].bv_page); + + unpin_user_folio(folio, 1); + } } static struct io_mapped_ubuf *io_alloc_imu(struct io_ring_ctx *ctx, @@ -731,6 +734,7 @@ bool io_check_coalesce_buffer(struct page **page_array, int nr_pages, data->nr_pages_mid = folio_nr_pages(folio); data->folio_shift = folio_shift(folio); + data->first_folio_page_idx = folio_page_idx(folio, page_array[0]); /* * Check if pages are contiguous inside a folio, and all folios have @@ -809,10 +813,8 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, imu->nr_bvecs = nr_pages; ret = io_buffer_account_pin(ctx, pages, nr_pages, imu, last_hpage); - if (ret) { - unpin_user_pages(pages, nr_pages); + if (ret) goto done; - } size = iov->iov_len; /* store original address for later verification */ @@ -826,7 +828,11 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, if (coalesced) imu->folio_shift = data.folio_shift; refcount_set(&imu->refs, 1); - off = (unsigned long) iov->iov_base & ((1UL << imu->folio_shift) - 1); + + off = (unsigned long)iov->iov_base & ~PAGE_MASK; + if (coalesced) + off += data.first_folio_page_idx << PAGE_SHIFT; + node->buf = imu; ret = 0; @@ -842,6 +848,10 @@ done: if (ret) { if (imu) io_free_imu(ctx, imu); + if (pages) { + for (i = 0; i < nr_pages; i++) + unpin_user_folio(page_folio(pages[i]), 1); + } io_cache_free(&ctx->node_cache, node); node = ERR_PTR(ret); } @@ -1177,6 +1187,8 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx return -EINVAL; if (check_add_overflow(arg->nr, arg->dst_off, &nbufs)) return -EOVERFLOW; + if (nbufs > IORING_MAX_REG_BUFFERS) + return -EINVAL; ret = io_rsrc_data_alloc(&data, max(nbufs, ctx->buf_table.nr)); if (ret) @@ -1327,7 +1339,6 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter, { unsigned long folio_size = 1 << imu->folio_shift; unsigned long folio_mask = folio_size - 1; - u64 folio_addr = imu->ubuf & ~folio_mask; struct bio_vec *res_bvec = vec->bvec; size_t total_len = 0; unsigned bvec_idx = 0; @@ -1349,8 +1360,13 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter, if (unlikely(check_add_overflow(total_len, iov_len, &total_len))) return -EOVERFLOW; - /* by using folio address it also accounts for bvec offset */ - offset = buf_addr - folio_addr; + offset = buf_addr - imu->ubuf; + /* + * Only the first bvec can have non zero bv_offset, account it + * here and work with full folios below. + */ + offset += imu->bvec[0].bv_offset; + src_bvec = imu->bvec + (offset >> imu->folio_shift); offset &= folio_mask; diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 0d2138f16322..25e7e998dcfd 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -49,6 +49,7 @@ struct io_imu_folio_data { unsigned int nr_pages_mid; unsigned int folio_shift; unsigned int nr_folios; + unsigned long first_folio_page_idx; }; bool io_rsrc_cache_init(struct io_ring_ctx *ctx); diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 268d2fbe6160..a3f11349ce06 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -16,6 +16,7 @@ #include <uapi/linux/io_uring.h> #include "io_uring.h" +#include "tctx.h" #include "napi.h" #include "sqpoll.h" @@ -419,7 +420,6 @@ void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) __cold int io_sq_offload_create(struct io_ring_ctx *ctx, struct io_uring_params *p) { - struct task_struct *task_to_put = NULL; int ret; /* Retain compatibility with failing for an invalid attach attempt */ @@ -498,7 +498,7 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, rcu_assign_pointer(sqd->thread, tsk); mutex_unlock(&sqd->lock); - task_to_put = get_task_struct(tsk); + get_task_struct(tsk); ret = io_uring_alloc_task_context(tsk, ctx); wake_up_new_task(tsk); if (ret) @@ -513,8 +513,6 @@ err_sqpoll: complete(&ctx->sq_data->exited); err: io_sq_thread_finish(ctx); - if (task_to_put) - put_task_struct(task_to_put); return ret; } diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 797247a34cb7..00d0064b22a5 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -106,8 +106,10 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq, for_each_sgtable_dma_sg(mem->sgt, sg, i) total_size += sg_dma_len(sg); - if (total_size < off + len) - return -EINVAL; + if (total_size < off + len) { + ret = -EINVAL; + goto err; + } mem->dmabuf_offset = off; mem->size = len; @@ -861,10 +863,7 @@ static int io_pp_zc_init(struct page_pool *pp) static void io_pp_zc_destroy(struct page_pool *pp) { struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp); - struct io_zcrx_area *area = ifq->area; - if (WARN_ON_ONCE(area->free_count != area->nia.num_niovs)) - return; percpu_ref_put(&ifq->ctx->refs); } diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index e64ce21f9a80..2ee603a98813 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -134,6 +134,7 @@ config CRASH_DM_CRYPT depends on KEXEC_FILE depends on CRASH_DUMP depends on DM_CRYPT + depends on KEYS help With this option enabled, user space can intereact with /sys/kernel/config/crash_dm_crypt_keys to make the dm crypt keys diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index f2f38903b2fe..b0eae2a3c895 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -668,12 +668,6 @@ int audit_remove_tree_rule(struct audit_krule *rule) return 0; } -static int compare_root(struct vfsmount *mnt, void *arg) -{ - return inode_to_key(d_backing_inode(mnt->mnt_root)) == - (unsigned long)arg; -} - void audit_trim_trees(void) { struct list_head cursor; @@ -683,8 +677,9 @@ void audit_trim_trees(void) while (cursor.next != &tree_list) { struct audit_tree *tree; struct path path; - struct vfsmount *root_mnt; struct audit_node *node; + struct path *paths; + struct path array[16]; int err; tree = container_of(cursor.next, struct audit_tree, list); @@ -696,9 +691,9 @@ void audit_trim_trees(void) if (err) goto skip_it; - root_mnt = collect_mounts(&path); + paths = collect_paths(&path, array, 16); path_put(&path); - if (IS_ERR(root_mnt)) + if (IS_ERR(paths)) goto skip_it; spin_lock(&hash_lock); @@ -706,14 +701,17 @@ void audit_trim_trees(void) struct audit_chunk *chunk = find_chunk(node); /* this could be NULL if the watch is dying else where... */ node->index |= 1U<<31; - if (iterate_mounts(compare_root, - (void *)(chunk->key), - root_mnt)) - node->index &= ~(1U<<31); + for (struct path *p = paths; p->dentry; p++) { + struct inode *inode = p->dentry->d_inode; + if (inode_to_key(inode) == chunk->key) { + node->index &= ~(1U<<31); + break; + } + } } spin_unlock(&hash_lock); trim_marked(tree); - drop_collected_mounts(root_mnt); + drop_collected_paths(paths, array); skip_it: put_tree(tree); mutex_lock(&audit_filter_mutex); @@ -742,9 +740,14 @@ void audit_put_tree(struct audit_tree *tree) put_tree(tree); } -static int tag_mount(struct vfsmount *mnt, void *arg) +static int tag_mounts(struct path *paths, struct audit_tree *tree) { - return tag_chunk(d_backing_inode(mnt->mnt_root), arg); + for (struct path *p = paths; p->dentry; p++) { + int err = tag_chunk(p->dentry->d_inode, tree); + if (err) + return err; + } + return 0; } /* @@ -801,7 +804,8 @@ int audit_add_tree_rule(struct audit_krule *rule) { struct audit_tree *seed = rule->tree, *tree; struct path path; - struct vfsmount *mnt; + struct path array[16]; + struct path *paths; int err; rule->tree = NULL; @@ -828,16 +832,16 @@ int audit_add_tree_rule(struct audit_krule *rule) err = kern_path(tree->pathname, 0, &path); if (err) goto Err; - mnt = collect_mounts(&path); + paths = collect_paths(&path, array, 16); path_put(&path); - if (IS_ERR(mnt)) { - err = PTR_ERR(mnt); + if (IS_ERR(paths)) { + err = PTR_ERR(paths); goto Err; } get_tree(tree); - err = iterate_mounts(tag_mount, tree, mnt); - drop_collected_mounts(mnt); + err = tag_mounts(paths, tree); + drop_collected_paths(paths, array); if (!err) { struct audit_node *node; @@ -872,20 +876,21 @@ int audit_tag_tree(char *old, char *new) struct list_head cursor, barrier; int failed = 0; struct path path1, path2; - struct vfsmount *tagged; + struct path array[16]; + struct path *paths; int err; err = kern_path(new, 0, &path2); if (err) return err; - tagged = collect_mounts(&path2); + paths = collect_paths(&path2, array, 16); path_put(&path2); - if (IS_ERR(tagged)) - return PTR_ERR(tagged); + if (IS_ERR(paths)) + return PTR_ERR(paths); err = kern_path(old, 0, &path1); if (err) { - drop_collected_mounts(tagged); + drop_collected_paths(paths, array); return err; } @@ -914,7 +919,7 @@ int audit_tag_tree(char *old, char *new) continue; } - failed = iterate_mounts(tag_mount, tree, tagged); + failed = tag_mounts(paths, tree); if (failed) { put_tree(tree); mutex_lock(&audit_filter_mutex); @@ -955,7 +960,7 @@ int audit_tag_tree(char *old, char *new) list_del(&cursor); mutex_unlock(&audit_filter_mutex); path_put(&path1); - drop_collected_mounts(tagged); + drop_collected_paths(paths, array); return failed; } diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c index 3dabdd137d10..2d6e1c98d8ad 100644 --- a/kernel/bpf/bpf_lru_list.c +++ b/kernel/bpf/bpf_lru_list.c @@ -337,12 +337,12 @@ static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, list) { __bpf_lru_node_move_to_free(l, node, local_free_list(loc_l), BPF_LRU_LOCAL_LIST_T_FREE); - if (++nfree == LOCAL_FREE_TARGET) + if (++nfree == lru->target_free) break; } - if (nfree < LOCAL_FREE_TARGET) - __bpf_lru_list_shrink(lru, l, LOCAL_FREE_TARGET - nfree, + if (nfree < lru->target_free) + __bpf_lru_list_shrink(lru, l, lru->target_free - nfree, local_free_list(loc_l), BPF_LRU_LOCAL_LIST_T_FREE); @@ -577,6 +577,9 @@ static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); buf += elem_size; } + + lru->target_free = clamp((nr_elems / num_possible_cpus()) / 2, + 1, LOCAL_FREE_TARGET); } static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf, diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h index cbd8d3720c2b..fe2661a58ea9 100644 --- a/kernel/bpf/bpf_lru_list.h +++ b/kernel/bpf/bpf_lru_list.h @@ -58,6 +58,7 @@ struct bpf_lru { del_from_htab_func del_from_htab; void *del_arg; unsigned int hash_offset; + unsigned int target_free; unsigned int nr_scans; bool percpu; }; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 9122c39870bf..f4885514f007 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -2134,7 +2134,7 @@ static const struct bpf_func_proto bpf_sysctl_get_name_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_PTR_TO_MEM | MEM_WRITE, .arg3_type = ARG_CONST_SIZE, .arg4_type = ARG_ANYTHING, }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a7d6e0c5928b..169845710c7e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7027,8 +7027,7 @@ BTF_TYPE_SAFE_TRUSTED(struct file) { struct inode *f_inode; }; -BTF_TYPE_SAFE_TRUSTED(struct dentry) { - /* no negative dentry-s in places where bpf can see it */ +BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry) { struct inode *d_inode; }; @@ -7066,7 +7065,6 @@ static bool type_is_trusted(struct bpf_verifier_env *env, BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task)); BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm)); BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file)); - BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry)); return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted"); } @@ -7076,6 +7074,7 @@ static bool type_is_trusted_or_null(struct bpf_verifier_env *env, const char *field_name, u32 btf_id) { BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry)); return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted_or_null"); diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c index 039d1eb2f215..507b8f19a262 100644 --- a/kernel/cgroup/legacy_freezer.c +++ b/kernel/cgroup/legacy_freezer.c @@ -188,13 +188,12 @@ static void freezer_attach(struct cgroup_taskset *tset) if (!(freezer->state & CGROUP_FREEZING)) { __thaw_task(task); } else { - freeze_task(task); - /* clear FROZEN and propagate upwards */ while (freezer && (freezer->state & CGROUP_FROZEN)) { freezer->state &= ~CGROUP_FROZEN; freezer = parent_freezer(freezer); } + freeze_task(task); } } diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 8df0dfaaca18..67af8a55185d 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -222,7 +222,10 @@ void __init dma_contiguous_reserve(phys_addr_t limit) if (size_cmdline != -1) { selected_size = size_cmdline; selected_base = base_cmdline; - selected_limit = min_not_zero(limit_cmdline, limit); + + /* Hornor the user setup dma address limit */ + selected_limit = limit_cmdline ?: limit; + if (base_cmdline + size_cmdline == limit_cmdline) fixed = true; } else { diff --git a/kernel/events/core.c b/kernel/events/core.c index f34c99f8ce8f..22fdf0c187cd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -207,6 +207,19 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx, __perf_ctx_unlock(&cpuctx->ctx); } +typedef struct { + struct perf_cpu_context *cpuctx; + struct perf_event_context *ctx; +} class_perf_ctx_lock_t; + +static inline void class_perf_ctx_lock_destructor(class_perf_ctx_lock_t *_T) +{ perf_ctx_unlock(_T->cpuctx, _T->ctx); } + +static inline class_perf_ctx_lock_t +class_perf_ctx_lock_constructor(struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx) +{ perf_ctx_lock(cpuctx, ctx); return (class_perf_ctx_lock_t){ cpuctx, ctx }; } + #define TASK_TOMBSTONE ((void *)-1L) static bool is_kernel_event(struct perf_event *event) @@ -938,13 +951,19 @@ static void perf_cgroup_switch(struct task_struct *task) if (READ_ONCE(cpuctx->cgrp) == NULL) return; - WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); - cgrp = perf_cgroup_from_task(task, NULL); if (READ_ONCE(cpuctx->cgrp) == cgrp) return; - perf_ctx_lock(cpuctx, cpuctx->task_ctx); + guard(perf_ctx_lock)(cpuctx, cpuctx->task_ctx); + /* + * Re-check, could've raced vs perf_remove_from_context(). + */ + if (READ_ONCE(cpuctx->cgrp) == NULL) + return; + + WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); + perf_ctx_disable(&cpuctx->ctx, true); ctx_sched_out(&cpuctx->ctx, NULL, EVENT_ALL|EVENT_CGROUP); @@ -962,7 +981,6 @@ static void perf_cgroup_switch(struct task_struct *task) ctx_sched_in(&cpuctx->ctx, NULL, EVENT_ALL|EVENT_CGROUP); perf_ctx_enable(&cpuctx->ctx, true); - perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } static int perf_cgroup_ensure_storage(struct perf_event *event, @@ -2120,18 +2138,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) if (event->group_leader == event) del_event_from_groups(event, ctx); - /* - * If event was in error state, then keep it - * that way, otherwise bogus counts will be - * returned on read(). The only way to get out - * of error state is by explicit re-enabling - * of the event - */ - if (event->state > PERF_EVENT_STATE_OFF) { - perf_cgroup_event_disable(event, ctx); - perf_event_set_state(event, PERF_EVENT_STATE_OFF); - } - ctx->generation++; event->pmu_ctx->nr_events--; } @@ -2149,8 +2155,9 @@ perf_aux_output_match(struct perf_event *event, struct perf_event *aux_event) } static void put_event(struct perf_event *event); -static void event_sched_out(struct perf_event *event, - struct perf_event_context *ctx); +static void __event_disable(struct perf_event *event, + struct perf_event_context *ctx, + enum perf_event_state state); static void perf_put_aux_event(struct perf_event *event) { @@ -2183,8 +2190,7 @@ static void perf_put_aux_event(struct perf_event *event) * state so that we don't try to schedule it again. Note * that perf_event_enable() will clear the ERROR status. */ - event_sched_out(iter, ctx); - perf_event_set_state(event, PERF_EVENT_STATE_ERROR); + __event_disable(iter, ctx, PERF_EVENT_STATE_ERROR); } } @@ -2242,18 +2248,6 @@ static inline struct list_head *get_event_list(struct perf_event *event) &event->pmu_ctx->flexible_active; } -/* - * Events that have PERF_EV_CAP_SIBLING require being part of a group and - * cannot exist on their own, schedule them out and move them into the ERROR - * state. Also see _perf_event_enable(), it will not be able to recover - * this ERROR state. - */ -static inline void perf_remove_sibling_event(struct perf_event *event) -{ - event_sched_out(event, event->ctx); - perf_event_set_state(event, PERF_EVENT_STATE_ERROR); -} - static void perf_group_detach(struct perf_event *event) { struct perf_event *leader = event->group_leader; @@ -2289,8 +2283,15 @@ static void perf_group_detach(struct perf_event *event) */ list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) { + /* + * Events that have PERF_EV_CAP_SIBLING require being part of + * a group and cannot exist on their own, schedule them out + * and move them into the ERROR state. Also see + * _perf_event_enable(), it will not be able to recover this + * ERROR state. + */ if (sibling->event_caps & PERF_EV_CAP_SIBLING) - perf_remove_sibling_event(sibling); + __event_disable(sibling, ctx, PERF_EVENT_STATE_ERROR); sibling->group_leader = sibling; list_del_init(&sibling->sibling_list); @@ -2493,11 +2494,14 @@ __perf_remove_from_context(struct perf_event *event, state = PERF_EVENT_STATE_EXIT; if (flags & DETACH_REVOKE) state = PERF_EVENT_STATE_REVOKED; - if (flags & DETACH_DEAD) { - event->pending_disable = 1; + if (flags & DETACH_DEAD) state = PERF_EVENT_STATE_DEAD; - } + event_sched_out(event, ctx); + + if (event->state > PERF_EVENT_STATE_OFF) + perf_cgroup_event_disable(event, ctx); + perf_event_set_state(event, min(event->state, state)); if (flags & DETACH_GROUP) @@ -2562,6 +2566,15 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla event_function_call(event, __perf_remove_from_context, (void *)flags); } +static void __event_disable(struct perf_event *event, + struct perf_event_context *ctx, + enum perf_event_state state) +{ + event_sched_out(event, ctx); + perf_cgroup_event_disable(event, ctx); + perf_event_set_state(event, state); +} + /* * Cross CPU call to disable a performance event */ @@ -2576,13 +2589,18 @@ static void __perf_event_disable(struct perf_event *event, perf_pmu_disable(event->pmu_ctx->pmu); ctx_time_update_event(ctx, event); + /* + * When disabling a group leader, the whole group becomes ineligible + * to run, so schedule out the full group. + */ if (event == event->group_leader) group_sched_out(event, ctx); - else - event_sched_out(event, ctx); - perf_event_set_state(event, PERF_EVENT_STATE_OFF); - perf_cgroup_event_disable(event, ctx); + /* + * But only mark the leader OFF; the siblings will remain + * INACTIVE. + */ + __event_disable(event, ctx, PERF_EVENT_STATE_OFF); perf_pmu_enable(event->pmu_ctx->pmu); } @@ -2656,8 +2674,8 @@ static void perf_event_unthrottle(struct perf_event *event, bool start) static void perf_event_throttle(struct perf_event *event) { - event->pmu->stop(event, 0); event->hw.interrupts = MAX_INTERRUPTS; + event->pmu->stop(event, 0); if (event == event->group_leader) perf_log_throttle(event, 0); } @@ -7186,18 +7204,18 @@ void perf_event_wakeup(struct perf_event *event) static void perf_sigtrap(struct perf_event *event) { /* - * We'd expect this to only occur if the irq_work is delayed and either - * ctx->task or current has changed in the meantime. This can be the - * case on architectures that do not implement arch_irq_work_raise(). + * Both perf_pending_task() and perf_pending_irq() can race with the + * task exiting. */ - if (WARN_ON_ONCE(event->ctx->task != current)) + if (current->flags & PF_EXITING) return; /* - * Both perf_pending_task() and perf_pending_irq() can race with the - * task exiting. + * We'd expect this to only occur if the irq_work is delayed and either + * ctx->task or current has changed in the meantime. This can be the + * case on architectures that do not implement arch_irq_work_raise(). */ - if (current->flags & PF_EXITING) + if (WARN_ON_ONCE(event->ctx->task != current)) return; send_sig_perf((void __user *)event->pending_addr, @@ -7233,15 +7251,15 @@ static void __perf_pending_disable(struct perf_event *event) * CPU-A CPU-B * * perf_event_disable_inatomic() - * @pending_disable = CPU-A; + * @pending_disable = 1; * irq_work_queue(); * * sched-out - * @pending_disable = -1; + * @pending_disable = 0; * * sched-in * perf_event_disable_inatomic() - * @pending_disable = CPU-B; + * @pending_disable = 1; * irq_work_queue(); // FAILS * * irq_work_run() @@ -7439,6 +7457,10 @@ perf_sample_ustack_size(u16 stack_size, u16 header_size, if (!regs) return 0; + /* No mm, no stack, no dump. */ + if (!current->mm) + return 0; + /* * Check if we fit in with the requested stack size into the: * - TASK_SIZE @@ -8150,6 +8172,9 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) const u32 max_stack = event->attr.sample_max_stack; struct perf_callchain_entry *callchain; + if (!current->mm) + user = false; + if (!kernel && !user) return &__empty_callchain; @@ -11091,7 +11116,7 @@ static int perf_uprobe_event_init(struct perf_event *event) if (event->attr.type != perf_uprobe.type) return -ENOENT; - if (!perfmon_capable()) + if (!capable(CAP_SYS_ADMIN)) return -EACCES; /* @@ -11749,7 +11774,12 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - if (is_sampling_event(event)) { + /* + * The throttle can be triggered in the hrtimer handler. + * The HRTIMER_NORESTART should be used to stop the timer, + * rather than hrtimer_cancel(). See perf_swevent_hrtimer() + */ + if (is_sampling_event(event) && (hwc->interrupts != MAX_INTERRUPTS)) { ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); local64_set(&hwc->period_left, ktime_to_ns(remaining)); @@ -11804,7 +11834,8 @@ static void cpu_clock_event_start(struct perf_event *event, int flags) static void cpu_clock_event_stop(struct perf_event *event, int flags) { perf_swevent_cancel_hrtimer(event); - cpu_clock_event_update(event); + if (flags & PERF_EF_UPDATE) + cpu_clock_event_update(event); } static int cpu_clock_event_add(struct perf_event *event, int flags) @@ -11882,7 +11913,8 @@ static void task_clock_event_start(struct perf_event *event, int flags) static void task_clock_event_stop(struct perf_event *event, int flags) { perf_swevent_cancel_hrtimer(event); - task_clock_event_update(event, event->ctx->time); + if (flags & PERF_EF_UPDATE) + task_clock_event_update(event, event->ctx->time); } static int task_clock_event_add(struct perf_event *event, int flags) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index d2aef87c7e9f..aa9a759e824f 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -441,7 +441,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, * store that will be enabled on successful return */ if (!handle->size) { /* A, matches D */ - event->pending_disable = smp_processor_id(); + perf_event_disable_inatomic(handle->event); perf_output_wakeup(handle); WRITE_ONCE(rb->aux_nest, 0); goto err_put; @@ -526,7 +526,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) if (wakeup) { if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) - handle->event->pending_disable = smp_processor_id(); + perf_event_disable_inatomic(handle->event); perf_output_wakeup(handle); } diff --git a/kernel/exit.c b/kernel/exit.c index bd743900354c..bb184a67ac73 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -940,6 +940,15 @@ void __noreturn do_exit(long code) taskstats_exit(tsk, group_dead); trace_sched_process_exit(tsk, group_dead); + /* + * Since sampling can touch ->mm, make sure to stop everything before we + * tear it down. + * + * Also flushes inherited counters to the parent - before the parent + * gets woken up by child-exit notifications. + */ + perf_event_exit_task(tsk); + exit_mm(); if (group_dead) @@ -955,14 +964,6 @@ void __noreturn do_exit(long code) exit_task_work(tsk); exit_thread(tsk); - /* - * Flush inherited counters to the parent - before the parent - * gets woken up by child-exit notifications. - * - * because of cgroup mode, must be called before cgroup_exit() - */ - perf_event_exit_task(tsk); - sched_autogroup_exit_task(tsk); cgroup_exit(tsk); diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 565f9717c6ca..90d53fb0ee9e 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -583,8 +583,8 @@ int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key, if (futex_get_value(&node, naddr)) return -EFAULT; - if (node != FUTEX_NO_NODE && - (node >= MAX_NUMNODES || !node_possible(node))) + if ((node != FUTEX_NO_NODE) && + ((unsigned int)node >= MAX_NUMNODES || !node_possible(node))) return -EINVAL; } @@ -1629,6 +1629,16 @@ again: mm->futex_phash_new = NULL; if (fph) { + if (cur && (!cur->hash_mask || cur->immutable)) { + /* + * If two threads simultaneously request the global + * hash then the first one performs the switch, + * the second one returns here. + */ + free = fph; + mm->futex_phash_new = new; + return -EBUSY; + } if (cur && !new) { /* * If we have an existing hash, but do not yet have diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index b0e0a7332993..2b274007e8ba 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -205,6 +205,14 @@ __irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff, void irq_startup_managed(struct irq_desc *desc) { + struct irq_data *d = irq_desc_get_irq_data(desc); + + /* + * Clear managed-shutdown flag, so we don't repeat managed-startup for + * multiple hotplugs, and cause imbalanced disable depth. + */ + irqd_clr_managed_shutdown(d); + /* * Only start it up when the disable depth is 1, so that a disable, * hotunplug, hotplug sequence does not end up enabling it during diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index f07529ae4895..755346ea9819 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -210,13 +210,6 @@ static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu) !irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity)) return; - /* - * Don't restore suspended interrupts here when a system comes back - * from S3. They are reenabled via resume_device_irqs(). - */ - if (desc->istate & IRQS_SUSPENDED) - return; - if (irqd_is_managed_and_shutdown(data)) irq_startup_managed(desc); diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c index 1a3d483548e2..ae4c9cbd1b4b 100644 --- a/kernel/irq/irq_sim.c +++ b/kernel/irq/irq_sim.c @@ -202,7 +202,7 @@ struct irq_domain *irq_domain_create_sim_full(struct fwnode_handle *fwnode, void *data) { struct irq_sim_work_ctx *work_ctx __free(kfree) = - kmalloc(sizeof(*work_ctx), GFP_KERNEL); + kzalloc(sizeof(*work_ctx), GFP_KERNEL); if (!work_ctx) return ERR_PTR(-ENOMEM); diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 9c59fa480b0b..3a9a9f240dbc 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1136,6 +1136,7 @@ int kernel_kexec(void) Resume_devices: dpm_resume_end(PMSG_RESTORE); Resume_console: + pm_restore_gfp_mask(); console_resume_all(); thaw_processes(); Restore_console: diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index 69b953551677..5a21dbe17950 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -164,11 +164,21 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn, } /* almost as free_reserved_page(), just don't free the page */ -static void kho_restore_page(struct page *page) +static void kho_restore_page(struct page *page, unsigned int order) { - ClearPageReserved(page); - init_page_count(page); - adjust_managed_page_count(page, 1); + unsigned int nr_pages = (1 << order); + + /* Head page gets refcount of 1. */ + set_page_count(page, 1); + + /* For higher order folios, tail pages get a page count of zero. */ + for (unsigned int i = 1; i < nr_pages; i++) + set_page_count(page + i, 0); + + if (order > 0) + prep_compound_page(page, order); + + adjust_managed_page_count(page, nr_pages); } /** @@ -186,15 +196,10 @@ struct folio *kho_restore_folio(phys_addr_t phys) return NULL; order = page->private; - if (order) { - if (order > MAX_PAGE_ORDER) - return NULL; - - prep_compound_page(page, order); - } else { - kho_restore_page(page); - } + if (order > MAX_PAGE_ORDER) + return NULL; + kho_restore_page(page, order); return page_folio(page); } EXPORT_SYMBOL_GPL(kho_restore_folio); diff --git a/kernel/module/main.c b/kernel/module/main.c index 413ac6ea3702..c2c08007029d 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1573,8 +1573,14 @@ static int apply_relocations(struct module *mod, const struct load_info *info) if (infosec >= info->hdr->e_shnum) continue; - /* Don't bother with non-allocated sections */ - if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC)) + /* + * Don't bother with non-allocated sections. + * An exception is the percpu section, which has separate allocations + * for individual CPUs. We relocate the percpu section in the initial + * ELF template and subsequently copy it to the per-CPU destinations. + */ + if (!(info->sechdrs[infosec].sh_flags & SHF_ALLOC) && + (!infosec || infosec != info->index.pcpu)) continue; if (info->sechdrs[i].sh_flags & SHF_RELA_LIVEPATCH) @@ -2696,9 +2702,8 @@ static int find_module_sections(struct module *mod, struct load_info *info) static int move_module(struct module *mod, struct load_info *info) { - int i; - enum mod_mem_type t = 0; - int ret = -ENOMEM; + int i, ret; + enum mod_mem_type t = MOD_MEM_NUM_TYPES; bool codetag_section_found = false; for_each_mod_mem_type(type) { @@ -2776,7 +2781,7 @@ static int move_module(struct module *mod, struct load_info *info) return 0; out_err: module_memory_restore_rox(mod); - for (t--; t >= 0; t--) + while (t--) module_memory_free(mod, t); if (codetag_section_found) codetag_free_module_sections(mod); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 519fb09de5e0..9216e3b91d3b 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -423,7 +423,6 @@ int hibernation_snapshot(int platform_mode) } console_suspend_all(); - pm_restrict_gfp_mask(); error = dpm_suspend(PMSG_FREEZE); @@ -559,7 +558,6 @@ int hibernation_restore(int platform_mode) pm_prepare_console(); console_suspend_all(); - pm_restrict_gfp_mask(); error = dpm_suspend_start(PMSG_QUIESCE); if (!error) { error = resume_target_kernel(platform_mode); @@ -571,7 +569,6 @@ int hibernation_restore(int platform_mode) BUG_ON(!error); } dpm_resume_end(PMSG_RECOVER); - pm_restore_gfp_mask(); console_resume_all(); pm_restore_console(); return error; diff --git a/kernel/power/power.h b/kernel/power/power.h index cb1d71562002..7ccd709af93f 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -239,11 +239,6 @@ static inline void suspend_test_finish(const char *label) {} /* kernel/power/main.c */ extern int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down); extern int pm_notifier_call_chain(unsigned long val); -void pm_restrict_gfp_mask(void); -void pm_restore_gfp_mask(void); -#else -static inline void pm_restrict_gfp_mask(void) {} -static inline void pm_restore_gfp_mask(void) {} #endif #ifdef CONFIG_HIGHMEM diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 76b141b9aac0..bb608b68fb30 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -540,6 +540,7 @@ int suspend_devices_and_enter(suspend_state_t state) return error; Recover_platform: + pm_restore_gfp_mask(); platform_recover(state); goto Resume_devices; } @@ -606,9 +607,7 @@ static int enter_state(suspend_state_t state) trace_suspend_resume(TPS("suspend_enter"), state, false); pm_pr_dbg("Suspending system (%s)\n", mem_sleep_labels[state]); - pm_restrict_gfp_mask(); error = suspend_devices_and_enter(state); - pm_restore_gfp_mask(); Finish: events_check_enabled = false; diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e8a4b720d7d2..14d4499c6fc3 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3072,6 +3072,10 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in) /* Misaligned rcu_head! */ WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1)); + /* Avoid NULL dereference if callback is NULL. */ + if (WARN_ON_ONCE(!func)) + return; + if (debug_rcu_head_queue(head)) { /* * Probable double call_rcu(), so leak the callback. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index dce50fa57471..81c6df746df1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3362,10 +3362,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) #ifdef CONFIG_NUMA_BALANCING static void __migrate_swap_task(struct task_struct *p, int cpu) { - __schedstat_inc(p->stats.numa_task_swapped); - count_vm_numa_event(NUMA_TASK_SWAP); - count_memcg_event_mm(p->mm, NUMA_TASK_SWAP); - if (task_on_rq_queued(p)) { struct rq *src_rq, *dst_rq; struct rq_flags srf, drf; @@ -3943,6 +3939,11 @@ static inline bool ttwu_queue_cond(struct task_struct *p, int cpu) if (!scx_allow_ttwu_queue(p)) return false; +#ifdef CONFIG_SMP + if (p->sched_class == &stop_sched_class) + return false; +#endif + /* * Do not complicate things with the async wake_list while the CPU is * in hotplug state. @@ -7663,7 +7664,7 @@ const char *preempt_model_str(void) if (IS_ENABLED(CONFIG_PREEMPT_DYNAMIC)) { seq_buf_printf(&s, "(%s)%s", - preempt_dynamic_mode > 0 ? + preempt_dynamic_mode >= 0 ? preempt_modes[preempt_dynamic_mode] : "undef", brace ? "}" : ""); return seq_buf_str(&s); @@ -7934,9 +7935,8 @@ int migrate_task_to(struct task_struct *p, int target_cpu) if (!cpumask_test_cpu(target_cpu, p->cpus_ptr)) return -EINVAL; - __schedstat_inc(p->stats.numa_task_migrated); - count_vm_numa_event(NUMA_TASK_MIGRATE); - count_memcg_event_mm(p->mm, NUMA_TASK_MIGRATE); + /* TODO: This is not properly updating schedstats */ + trace_sched_move_numa(p, curr_cpu, target_cpu); return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg); } @@ -8545,7 +8545,7 @@ void __init sched_init(void) init_cfs_bandwidth(&root_task_group.cfs_bandwidth, NULL); #endif /* CONFIG_FAIR_GROUP_SCHED */ #ifdef CONFIG_EXT_GROUP_SCHED - root_task_group.scx_weight = CGROUP_WEIGHT_DFL; + scx_tg_init(&root_task_group); #endif /* CONFIG_EXT_GROUP_SCHED */ #ifdef CONFIG_RT_GROUP_SCHED root_task_group.rt_se = (struct sched_rt_entity **)ptr; @@ -8985,7 +8985,7 @@ struct task_group *sched_create_group(struct task_group *parent) if (!alloc_rt_sched_group(tg, parent)) goto err; - scx_group_set_weight(tg, CGROUP_WEIGHT_DFL); + scx_tg_init(tg); alloc_uclamp_sched_group(tg, parent); return tg; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index ad45a8fea245..89019a140826 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1504,7 +1504,9 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 if (dl_entity_is_special(dl_se)) return; - scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se, delta_exec); + scaled_delta_exec = delta_exec; + if (!dl_server(dl_se)) + scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se, delta_exec); dl_se->runtime -= scaled_delta_exec; @@ -1611,7 +1613,7 @@ throttle: */ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p) { - s64 delta_exec, scaled_delta_exec; + s64 delta_exec; if (!rq->fair_server.dl_defer) return; @@ -1624,9 +1626,7 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p) if (delta_exec < 0) return; - scaled_delta_exec = dl_scaled_delta_exec(rq, &rq->fair_server, delta_exec); - - rq->fair_server.runtime -= scaled_delta_exec; + rq->fair_server.runtime -= delta_exec; if (rq->fair_server.runtime < 0) { rq->fair_server.dl_defer_running = 0; diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 9d71baf08075..557246880a7e 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -1210,10 +1210,6 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, P_SCHEDSTAT(nr_failed_migrations_running); P_SCHEDSTAT(nr_failed_migrations_hot); P_SCHEDSTAT(nr_forced_migrations); -#ifdef CONFIG_NUMA_BALANCING - P_SCHEDSTAT(numa_task_migrated); - P_SCHEDSTAT(numa_task_swapped); -#endif P_SCHEDSTAT(nr_wakeups); P_SCHEDSTAT(nr_wakeups_sync); P_SCHEDSTAT(nr_wakeups_migrate); diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 2c41c78be61e..b498d867ba21 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4092,6 +4092,11 @@ bool scx_can_stop_tick(struct rq *rq) DEFINE_STATIC_PERCPU_RWSEM(scx_cgroup_rwsem); static bool scx_cgroup_enabled; +void scx_tg_init(struct task_group *tg) +{ + tg->scx_weight = CGROUP_WEIGHT_DFL; +} + int scx_tg_online(struct task_group *tg) { struct scx_sched *sch = scx_root; @@ -4241,12 +4246,12 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight) percpu_down_read(&scx_cgroup_rwsem); - if (scx_cgroup_enabled && tg->scx_weight != weight) { - if (SCX_HAS_OP(sch, cgroup_set_weight)) - SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL, - tg_cgrp(tg), weight); - tg->scx_weight = weight; - } + if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_weight) && + tg->scx_weight != weight) + SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL, + tg_cgrp(tg), weight); + + tg->scx_weight = weight; percpu_up_read(&scx_cgroup_rwsem); } diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index 6e5072f57771..a75835c23f15 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -79,6 +79,7 @@ static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify) {} #ifdef CONFIG_CGROUP_SCHED #ifdef CONFIG_EXT_GROUP_SCHED +void scx_tg_init(struct task_group *tg); int scx_tg_online(struct task_group *tg); void scx_tg_offline(struct task_group *tg); int scx_cgroup_can_attach(struct cgroup_taskset *tset); @@ -88,6 +89,7 @@ void scx_cgroup_cancel_attach(struct cgroup_taskset *tset); void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight); void scx_group_set_idle(struct task_group *tg, bool idle); #else /* CONFIG_EXT_GROUP_SCHED */ +static inline void scx_tg_init(struct task_group *tg) {} static inline int scx_tg_online(struct task_group *tg) { return 0; } static inline void scx_tg_offline(struct task_group *tg) {} static inline int scx_cgroup_can_attach(struct cgroup_taskset *tset) { return 0; } diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 5d2d0562115b..3fe6b0c99f3d 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -82,18 +82,15 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done) } static void __cpu_stop_queue_work(struct cpu_stopper *stopper, - struct cpu_stop_work *work, - struct wake_q_head *wakeq) + struct cpu_stop_work *work) { list_add_tail(&work->list, &stopper->works); - wake_q_add(wakeq, stopper->thread); } /* queue @work to @stopper. if offline, @work is completed immediately */ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); - DEFINE_WAKE_Q(wakeq); unsigned long flags; bool enabled; @@ -101,12 +98,13 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) raw_spin_lock_irqsave(&stopper->lock, flags); enabled = stopper->enabled; if (enabled) - __cpu_stop_queue_work(stopper, work, &wakeq); + __cpu_stop_queue_work(stopper, work); else if (work->done) cpu_stop_signal_done(work->done); raw_spin_unlock_irqrestore(&stopper->lock, flags); - wake_up_q(&wakeq); + if (enabled) + wake_up_process(stopper->thread); preempt_enable(); return enabled; @@ -264,7 +262,6 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, { struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1); struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); - DEFINE_WAKE_Q(wakeq); int err; retry: @@ -300,8 +297,8 @@ retry: } err = 0; - __cpu_stop_queue_work(stopper1, work1, &wakeq); - __cpu_stop_queue_work(stopper2, work2, &wakeq); + __cpu_stop_queue_work(stopper1, work1); + __cpu_stop_queue_work(stopper2, work2); unlock: raw_spin_unlock(&stopper2->lock); @@ -316,7 +313,10 @@ unlock: goto retry; } - wake_up_q(&wakeq); + if (!err) { + wake_up_process(stopper1->thread); + wake_up_process(stopper2->thread); + } preempt_enable(); return err; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 08141f105c95..3885aadc434d 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1436,13 +1436,6 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, INIT_LIST_HEAD(&head->list); - item = kmalloc(sizeof(*item), GFP_KERNEL); - if (!item) - goto free_now; - - item->filter = filter; - list_add_tail(&item->list, &head->list); - list_for_each_entry(file, &tr->events, list) { if (file->system != dir) continue; @@ -1454,6 +1447,13 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, event_clear_filter(file); } + item = kmalloc(sizeof(*item), GFP_KERNEL); + if (!item) + goto free_now; + + item->filter = filter; + list_add_tail(&item->list, &head->list); + delay_free_filter(head); return; free_now: diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 9234e2c39abf..14d74a7491b8 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -455,10 +455,16 @@ static int graph_trace_init(struct trace_array *tr) return 0; } +static struct tracer graph_trace; + static int ftrace_graph_trace_args(struct trace_array *tr, int set) { trace_func_graph_ent_t entry; + /* Do nothing if the current tracer is not this tracer */ + if (tr->current_trace != &graph_trace) + return 0; + if (set) entry = trace_graph_entry_args; else diff --git a/kernel/workqueue.c b/kernel/workqueue.c index bd195d4db685..992cb0467c21 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -7767,7 +7767,8 @@ void __init workqueue_init_early(void) restrict_unbound_cpumask("workqueue.unbound_cpus", &wq_cmdline_cpumask); cpumask_copy(wq_requested_unbound_cpumask, wq_unbound_cpumask); - + cpumask_andnot(wq_isolated_cpumask, cpu_possible_mask, + housekeeping_cpumask(HK_TYPE_DOMAIN)); pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); unbound_wq_update_pwq_attrs_buf = alloc_workqueue_attrs(); diff --git a/lib/Kconfig b/lib/Kconfig index 6c1b8f184267..37db228f70a9 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -716,6 +716,7 @@ config GENERIC_LIB_DEVMEM_IS_ALLOWED config PLDMFW bool + select CRC32 default n config ASN1_ENCODER diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index d48b80f3f007..0142bc916f73 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -10,6 +10,7 @@ #include <linux/seq_buf.h> #include <linux/seq_file.h> #include <linux/vmalloc.h> +#include <linux/kmemleak.h> #define ALLOCINFO_FILE_NAME "allocinfo" #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) @@ -134,6 +135,9 @@ size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sl struct codetag_bytes n; unsigned int i, nr = 0; + if (IS_ERR_OR_NULL(alloc_tag_cttype)) + return 0; + if (can_sleep) codetag_lock_module_list(alloc_tag_cttype, true); else if (!codetag_trylock_module_list(alloc_tag_cttype)) @@ -632,8 +636,13 @@ static int load_module(struct module *mod, struct codetag *start, struct codetag mod->name); return -ENOMEM; } - } + /* + * Avoid a kmemleak false positive. The pointer to the counters is stored + * in the alloc_tag section of the module and cannot be directly accessed. + */ + kmemleak_ignore_percpu(tag->counters); + } return 0; } diff --git a/lib/closure.c b/lib/closure.c index 2bfe7d2a0048..4fb78d18ee1b 100644 --- a/lib/closure.c +++ b/lib/closure.c @@ -13,23 +13,25 @@ #include <linux/seq_file.h> #include <linux/sched/debug.h> -static inline void closure_put_after_sub_checks(int flags) +static inline void closure_put_after_sub_checks(struct closure *cl, int flags) { int r = flags & CLOSURE_REMAINING_MASK; if (WARN(flags & CLOSURE_GUARD_MASK, - "closure has guard bits set: %x (%u)", + "closure %ps has guard bits set: %x (%u)", + cl->fn, flags & CLOSURE_GUARD_MASK, (unsigned) __fls(r))) r &= ~CLOSURE_GUARD_MASK; WARN(!r && (flags & ~CLOSURE_DESTRUCTOR), - "closure ref hit 0 with incorrect flags set: %x (%u)", + "closure %ps ref hit 0 with incorrect flags set: %x (%u)", + cl->fn, flags & ~CLOSURE_DESTRUCTOR, (unsigned) __fls(flags)); } static inline void closure_put_after_sub(struct closure *cl, int flags) { - closure_put_after_sub_checks(flags); + closure_put_after_sub_checks(cl, flags); if (!(flags & CLOSURE_REMAINING_MASK)) { smp_acquire__after_ctrl_dep(); @@ -167,7 +169,7 @@ void __sched closure_return_sync(struct closure *cl) unsigned flags = atomic_sub_return_release(1 + CLOSURE_RUNNING - CLOSURE_DESTRUCTOR, &cl->remaining); - closure_put_after_sub_checks(flags); + closure_put_after_sub_checks(cl, flags); if (unlikely(flags & CLOSURE_REMAINING_MASK)) { while (1) { diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 3e79283b617d..b0c0f8aea269 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -35,6 +35,10 @@ obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519-generic.o libcurve25519-generic-y := curve25519-fiat32.o libcurve25519-generic-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o libcurve25519-generic-y += curve25519-generic.o +# clang versions prior to 18 may blow out the stack with KASAN +ifeq ($(call clang-min-version, 180000),) +KASAN_SANITIZE_curve25519-hacl64.o := n +endif obj-$(CONFIG_CRYPTO_LIB_CURVE25519) += libcurve25519.o libcurve25519-y += curve25519.o @@ -62,7 +66,7 @@ libsha256-generic-y := sha256-generic.o obj-$(CONFIG_MPILIB) += mpi/ -obj-$(CONFIG_CRYPTO_SELFTESTS) += simd.o +obj-$(CONFIG_CRYPTO_SELFTESTS_FULL) += simd.o obj-$(CONFIG_CRYPTO_LIB_SM3) += libsm3.o libsm3-y := sm3.o diff --git a/lib/crypto/aescfb.c b/lib/crypto/aescfb.c index 437613265e14..2f09ae92ffa0 100644 --- a/lib/crypto/aescfb.c +++ b/lib/crypto/aescfb.c @@ -106,11 +106,11 @@ MODULE_LICENSE("GPL"); */ static struct { - u8 ptext[64]; - u8 ctext[64]; + u8 ptext[64] __nonstring; + u8 ctext[64] __nonstring; - u8 key[AES_MAX_KEY_SIZE]; - u8 iv[AES_BLOCK_SIZE]; + u8 key[AES_MAX_KEY_SIZE] __nonstring; + u8 iv[AES_BLOCK_SIZE] __nonstring; int klen; int len; diff --git a/lib/crypto/aesgcm.c b/lib/crypto/aesgcm.c index 277824d6b4af..faa4dee9bb1b 100644 --- a/lib/crypto/aesgcm.c +++ b/lib/crypto/aesgcm.c @@ -205,19 +205,19 @@ MODULE_LICENSE("GPL"); * Test code below. Vectors taken from crypto/testmgr.h */ -static const u8 __initconst ctext0[16] = +static const u8 __initconst ctext0[16] __nonstring = "\x58\xe2\xfc\xce\xfa\x7e\x30\x61" "\x36\x7f\x1d\x57\xa4\xe7\x45\x5a"; static const u8 __initconst ptext1[16]; -static const u8 __initconst ctext1[32] = +static const u8 __initconst ctext1[32] __nonstring = "\x03\x88\xda\xce\x60\xb6\xa3\x92" "\xf3\x28\xc2\xb9\x71\xb2\xfe\x78" "\xab\x6e\x47\xd4\x2c\xec\x13\xbd" "\xf5\x3a\x67\xb2\x12\x57\xbd\xdf"; -static const u8 __initconst ptext2[64] = +static const u8 __initconst ptext2[64] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -227,7 +227,7 @@ static const u8 __initconst ptext2[64] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39\x1a\xaf\xd2\x55"; -static const u8 __initconst ctext2[80] = +static const u8 __initconst ctext2[80] __nonstring = "\x42\x83\x1e\xc2\x21\x77\x74\x24" "\x4b\x72\x21\xb7\x84\xd0\xd4\x9c" "\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0" @@ -239,7 +239,7 @@ static const u8 __initconst ctext2[80] = "\x4d\x5c\x2a\xf3\x27\xcd\x64\xa6" "\x2c\xf3\x5a\xbd\x2b\xa6\xfa\xb4"; -static const u8 __initconst ptext3[60] = +static const u8 __initconst ptext3[60] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -249,7 +249,7 @@ static const u8 __initconst ptext3[60] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39"; -static const u8 __initconst ctext3[76] = +static const u8 __initconst ctext3[76] __nonstring = "\x42\x83\x1e\xc2\x21\x77\x74\x24" "\x4b\x72\x21\xb7\x84\xd0\xd4\x9c" "\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0" @@ -261,17 +261,17 @@ static const u8 __initconst ctext3[76] = "\x5b\xc9\x4f\xbc\x32\x21\xa5\xdb" "\x94\xfa\xe9\x5a\xe7\x12\x1a\x47"; -static const u8 __initconst ctext4[16] = +static const u8 __initconst ctext4[16] __nonstring = "\xcd\x33\xb2\x8a\xc7\x73\xf7\x4b" "\xa0\x0e\xd1\xf3\x12\x57\x24\x35"; -static const u8 __initconst ctext5[32] = +static const u8 __initconst ctext5[32] __nonstring = "\x98\xe7\x24\x7c\x07\xf0\xfe\x41" "\x1c\x26\x7e\x43\x84\xb0\xf6\x00" "\x2f\xf5\x8d\x80\x03\x39\x27\xab" "\x8e\xf4\xd4\x58\x75\x14\xf0\xfb"; -static const u8 __initconst ptext6[64] = +static const u8 __initconst ptext6[64] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -281,7 +281,7 @@ static const u8 __initconst ptext6[64] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39\x1a\xaf\xd2\x55"; -static const u8 __initconst ctext6[80] = +static const u8 __initconst ctext6[80] __nonstring = "\x39\x80\xca\x0b\x3c\x00\xe8\x41" "\xeb\x06\xfa\xc4\x87\x2a\x27\x57" "\x85\x9e\x1c\xea\xa6\xef\xd9\x84" @@ -293,17 +293,17 @@ static const u8 __initconst ctext6[80] = "\x99\x24\xa7\xc8\x58\x73\x36\xbf" "\xb1\x18\x02\x4d\xb8\x67\x4a\x14"; -static const u8 __initconst ctext7[16] = +static const u8 __initconst ctext7[16] __nonstring = "\x53\x0f\x8a\xfb\xc7\x45\x36\xb9" "\xa9\x63\xb4\xf1\xc4\xcb\x73\x8b"; -static const u8 __initconst ctext8[32] = +static const u8 __initconst ctext8[32] __nonstring = "\xce\xa7\x40\x3d\x4d\x60\x6b\x6e" "\x07\x4e\xc5\xd3\xba\xf3\x9d\x18" "\xd0\xd1\xc8\xa7\x99\x99\x6b\xf0" "\x26\x5b\x98\xb5\xd4\x8a\xb9\x19"; -static const u8 __initconst ptext9[64] = +static const u8 __initconst ptext9[64] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -313,7 +313,7 @@ static const u8 __initconst ptext9[64] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39\x1a\xaf\xd2\x55"; -static const u8 __initconst ctext9[80] = +static const u8 __initconst ctext9[80] __nonstring = "\x52\x2d\xc1\xf0\x99\x56\x7d\x07" "\xf4\x7f\x37\xa3\x2a\x84\x42\x7d" "\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9" @@ -325,7 +325,7 @@ static const u8 __initconst ctext9[80] = "\xb0\x94\xda\xc5\xd9\x34\x71\xbd" "\xec\x1a\x50\x22\x70\xe3\xcc\x6c"; -static const u8 __initconst ptext10[60] = +static const u8 __initconst ptext10[60] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -335,7 +335,7 @@ static const u8 __initconst ptext10[60] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39"; -static const u8 __initconst ctext10[76] = +static const u8 __initconst ctext10[76] __nonstring = "\x52\x2d\xc1\xf0\x99\x56\x7d\x07" "\xf4\x7f\x37\xa3\x2a\x84\x42\x7d" "\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9" @@ -347,7 +347,7 @@ static const u8 __initconst ctext10[76] = "\x76\xfc\x6e\xce\x0f\x4e\x17\x68" "\xcd\xdf\x88\x53\xbb\x2d\x55\x1b"; -static const u8 __initconst ptext11[60] = +static const u8 __initconst ptext11[60] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -357,7 +357,7 @@ static const u8 __initconst ptext11[60] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39"; -static const u8 __initconst ctext11[76] = +static const u8 __initconst ctext11[76] __nonstring = "\x39\x80\xca\x0b\x3c\x00\xe8\x41" "\xeb\x06\xfa\xc4\x87\x2a\x27\x57" "\x85\x9e\x1c\xea\xa6\xef\xd9\x84" @@ -369,7 +369,7 @@ static const u8 __initconst ctext11[76] = "\x25\x19\x49\x8e\x80\xf1\x47\x8f" "\x37\xba\x55\xbd\x6d\x27\x61\x8c"; -static const u8 __initconst ptext12[719] = +static const u8 __initconst ptext12[719] __nonstring = "\x42\xc1\xcc\x08\x48\x6f\x41\x3f" "\x2f\x11\x66\x8b\x2a\x16\xf0\xe0" "\x58\x83\xf0\xc3\x70\x14\xc0\x5b" @@ -461,7 +461,7 @@ static const u8 __initconst ptext12[719] = "\x59\xfa\xfa\xaa\x44\x04\x01\xa7" "\xa4\x78\xdb\x74\x3d\x8b\xb5"; -static const u8 __initconst ctext12[735] = +static const u8 __initconst ctext12[735] __nonstring = "\x84\x0b\xdb\xd5\xb7\xa8\xfe\x20" "\xbb\xb1\x12\x7f\x41\xea\xb3\xc0" "\xa2\xb4\x37\x19\x11\x58\xb6\x0b" @@ -559,9 +559,9 @@ static struct { const u8 *ptext; const u8 *ctext; - u8 key[AES_MAX_KEY_SIZE]; - u8 iv[GCM_AES_IV_SIZE]; - u8 assoc[20]; + u8 key[AES_MAX_KEY_SIZE] __nonstring; + u8 iv[GCM_AES_IV_SIZE] __nonstring; + u8 assoc[20] __nonstring; int klen; int clen; diff --git a/lib/group_cpus.c b/lib/group_cpus.c index ee272c4cefcc..18d43a406114 100644 --- a/lib/group_cpus.c +++ b/lib/group_cpus.c @@ -352,6 +352,9 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps) int ret = -ENOMEM; struct cpumask *masks = NULL; + if (numgrps == 0) + return NULL; + if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) return NULL; @@ -426,8 +429,12 @@ struct cpumask *group_cpus_evenly(unsigned int numgrps) #else /* CONFIG_SMP */ struct cpumask *group_cpus_evenly(unsigned int numgrps) { - struct cpumask *masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL); + struct cpumask *masks; + if (numgrps == 0) + return NULL; + + masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL); if (!masks) return NULL; diff --git a/lib/maple_tree.c b/lib/maple_tree.c index affe979bd14d..ef66be963798 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -5319,6 +5319,7 @@ static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, struct maple_enode *start; if (mte_is_leaf(enode)) { + mte_set_node_dead(enode); node->type = mte_node_type(enode); goto free_leaf; } @@ -5527,8 +5528,9 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) mas->store_type = mas_wr_store_type(&wr_mas); request = mas_prealloc_calc(&wr_mas, entry); if (!request) - return ret; + goto set_flag; + mas->mas_flags &= ~MA_STATE_PREALLOC; mas_node_count_gfp(mas, request, gfp); if (mas_is_err(mas)) { mas_set_alloc_req(mas, 0); @@ -5538,6 +5540,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) return ret; } +set_flag: mas->mas_flags |= MA_STATE_PREALLOC; return ret; } diff --git a/lib/raid6/rvv.c b/lib/raid6/rvv.c index f0887344b274..7d82efa5b14f 100644 --- a/lib/raid6/rvv.c +++ b/lib/raid6/rvv.c @@ -26,9 +26,9 @@ static int rvv_has_vector(void) static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) { u8 **dptr = (u8 **)ptrs; - unsigned long d; - int z, z0; u8 *p, *q; + unsigned long vl, d; + int z, z0; z0 = disks - 3; /* Highest data disk */ p = dptr[z0 + 1]; /* XOR parity */ @@ -36,8 +36,9 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ @@ -99,7 +100,7 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long d; + unsigned long vl, d; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -108,8 +109,9 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ @@ -195,9 +197,9 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) { u8 **dptr = (u8 **)ptrs; - unsigned long d; - int z, z0; u8 *p, *q; + unsigned long vl, d; + int z, z0; z0 = disks - 3; /* Highest data disk */ p = dptr[z0 + 1]; /* XOR parity */ @@ -205,8 +207,9 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* @@ -287,7 +290,7 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long d; + unsigned long vl, d; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -296,8 +299,9 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* @@ -413,9 +417,9 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) { u8 **dptr = (u8 **)ptrs; - unsigned long d; - int z, z0; u8 *p, *q; + unsigned long vl, d; + int z, z0; z0 = disks - 3; /* Highest data disk */ p = dptr[z0 + 1]; /* XOR parity */ @@ -423,8 +427,9 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* @@ -539,7 +544,7 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long d; + unsigned long vl, d; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -548,8 +553,9 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* @@ -721,9 +727,9 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) { u8 **dptr = (u8 **)ptrs; - unsigned long d; - int z, z0; u8 *p, *q; + unsigned long vl, d; + int z, z0; z0 = disks - 3; /* Highest data disk */ p = dptr[z0 + 1]; /* XOR parity */ @@ -731,8 +737,9 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* @@ -915,7 +922,7 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, { u8 **dptr = (u8 **)ptrs; u8 *p, *q; - unsigned long d; + unsigned long vl, d; int z, z0; z0 = stop; /* P/Q right side optimization */ @@ -924,8 +931,9 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" - "vsetvli t0, x0, e8, m1, ta, ma\n" + "vsetvli %0, x0, e8, m1, ta, ma\n" ".option pop\n" + : "=&r" (vl) ); /* diff --git a/lib/test_objagg.c b/lib/test_objagg.c index d34df4306b87..222b39fc2629 100644 --- a/lib/test_objagg.c +++ b/lib/test_objagg.c @@ -899,8 +899,10 @@ static int check_expect_hints_stats(struct objagg_hints *objagg_hints, int err; stats = objagg_hints_stats_get(objagg_hints); - if (IS_ERR(stats)) + if (IS_ERR(stats)) { + *errmsg = "objagg_hints_stats_get() failed."; return PTR_ERR(stats); + } err = __check_expect_stats(stats, expect_stats, errmsg); objagg_stats_put(stats); return err; diff --git a/mm/damon/core.c b/mm/damon/core.c index b217e0120e09..979b29e16ef4 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1449,6 +1449,7 @@ static unsigned long damon_get_intervals_score(struct damon_ctx *c) } } target_access_events = max_access_events * goal_bp / 10000; + target_access_events = target_access_events ? : 1; return access_events * 10000 / target_access_events; } @@ -2355,9 +2356,8 @@ static void kdamond_usleep(unsigned long usecs) * * If there is a &struct damon_call_control request that registered via * &damon_call() on @ctx, do or cancel the invocation of the function depending - * on @cancel. @cancel is set when the kdamond is deactivated by DAMOS - * watermarks, or the kdamond is already out of the main loop and therefore - * will be terminated. + * on @cancel. @cancel is set when the kdamond is already out of the main loop + * and therefore will be terminated. */ static void kdamond_call(struct damon_ctx *ctx, bool cancel) { @@ -2405,7 +2405,7 @@ static int kdamond_wait_activation(struct damon_ctx *ctx) if (ctx->callback.after_wmarks_check && ctx->callback.after_wmarks_check(ctx)) break; - kdamond_call(ctx, true); + kdamond_call(ctx, false); damos_walk_cancel(ctx); } return -EBUSY; diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 0f6c9e1fec0b..30ae7518ffbf 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -472,6 +472,7 @@ static ssize_t memcg_path_store(struct kobject *kobj, return -ENOMEM; strscpy(path, buf, count + 1); + kfree(filter->memcg_path); filter->memcg_path = path; return count; } diff --git a/mm/execmem.c b/mm/execmem.c index 9720ac2dfa41..2b683e7d864d 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -254,34 +254,6 @@ out_unlock: return ptr; } -static bool execmem_cache_rox = false; - -void execmem_cache_make_ro(void) -{ - struct maple_tree *free_areas = &execmem_cache.free_areas; - struct maple_tree *busy_areas = &execmem_cache.busy_areas; - MA_STATE(mas_free, free_areas, 0, ULONG_MAX); - MA_STATE(mas_busy, busy_areas, 0, ULONG_MAX); - struct mutex *mutex = &execmem_cache.mutex; - void *area; - - execmem_cache_rox = true; - - mutex_lock(mutex); - - mas_for_each(&mas_free, area, ULONG_MAX) { - unsigned long pages = mas_range_len(&mas_free) >> PAGE_SHIFT; - set_memory_ro(mas_free.index, pages); - } - - mas_for_each(&mas_busy, area, ULONG_MAX) { - unsigned long pages = mas_range_len(&mas_busy) >> PAGE_SHIFT; - set_memory_ro(mas_busy.index, pages); - } - - mutex_unlock(mutex); -} - static int execmem_cache_populate(struct execmem_range *range, size_t size) { unsigned long vm_flags = VM_ALLOW_HUGE_VMAP; @@ -302,15 +274,9 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size) /* fill memory with instructions that will trap */ execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true); - if (execmem_cache_rox) { - err = set_memory_rox((unsigned long)p, vm->nr_pages); - if (err) - goto err_free_mem; - } else { - err = set_memory_x((unsigned long)p, vm->nr_pages); - if (err) - goto err_free_mem; - } + err = set_memory_rox((unsigned long)p, vm->nr_pages); + if (err) + goto err_free_mem; err = execmem_cache_add(p, alloc_size); if (err) @@ -2303,13 +2303,13 @@ static void pofs_unpin(struct pages_or_folios *pofs) /* * Returns the number of collected folios. Return value is always >= 0. */ -static void collect_longterm_unpinnable_folios( +static unsigned long collect_longterm_unpinnable_folios( struct list_head *movable_folio_list, struct pages_or_folios *pofs) { + unsigned long i, collected = 0; struct folio *prev_folio = NULL; bool drain_allow = true; - unsigned long i; for (i = 0; i < pofs->nr_entries; i++) { struct folio *folio = pofs_get_folio(pofs, i); @@ -2321,6 +2321,8 @@ static void collect_longterm_unpinnable_folios( if (folio_is_longterm_pinnable(folio)) continue; + collected++; + if (folio_is_device_coherent(folio)) continue; @@ -2342,6 +2344,8 @@ static void collect_longterm_unpinnable_folios( NR_ISOLATED_ANON + folio_is_file_lru(folio), folio_nr_pages(folio)); } + + return collected; } /* @@ -2418,9 +2422,11 @@ static long check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs) { LIST_HEAD(movable_folio_list); + unsigned long collected; - collect_longterm_unpinnable_folios(&movable_folio_list, pofs); - if (list_empty(&movable_folio_list)) + collected = collect_longterm_unpinnable_folios(&movable_folio_list, + pofs); + if (!collected) return 0; return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8746ed2fec13..a0d285d20992 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2340,12 +2340,15 @@ struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, struct folio *folio; spin_lock_irq(&hugetlb_lock); + if (!h->resv_huge_pages) { + spin_unlock_irq(&hugetlb_lock); + return NULL; + } + folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask, preferred_nid, nmask); - if (folio) { - VM_BUG_ON(!h->resv_huge_pages); + if (folio) h->resv_huge_pages--; - } spin_unlock_irq(&hugetlb_lock); return folio; @@ -2787,20 +2790,24 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, /* * alloc_and_dissolve_hugetlb_folio - Allocate a new folio and dissolve * the old one - * @h: struct hstate old page belongs to * @old_folio: Old folio to dissolve * @list: List to isolate the page in case we need to * Returns 0 on success, otherwise negated error. */ -static int alloc_and_dissolve_hugetlb_folio(struct hstate *h, - struct folio *old_folio, struct list_head *list) +static int alloc_and_dissolve_hugetlb_folio(struct folio *old_folio, + struct list_head *list) { - gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; + gfp_t gfp_mask; + struct hstate *h; int nid = folio_nid(old_folio); struct folio *new_folio = NULL; int ret = 0; retry: + /* + * The old_folio might have been dissolved from under our feet, so make sure + * to carefully check the state under the lock. + */ spin_lock_irq(&hugetlb_lock); if (!folio_test_hugetlb(old_folio)) { /* @@ -2829,8 +2836,10 @@ retry: cond_resched(); goto retry; } else { + h = folio_hstate(old_folio); if (!new_folio) { spin_unlock_irq(&hugetlb_lock); + gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, NULL, NULL); if (!new_folio) @@ -2874,35 +2883,24 @@ free_new: int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list) { - struct hstate *h; int ret = -EBUSY; - /* - * The page might have been dissolved from under our feet, so make sure - * to carefully check the state under the lock. - * Return success when racing as if we dissolved the page ourselves. - */ - spin_lock_irq(&hugetlb_lock); - if (folio_test_hugetlb(folio)) { - h = folio_hstate(folio); - } else { - spin_unlock_irq(&hugetlb_lock); + /* Not to disrupt normal path by vainly holding hugetlb_lock */ + if (!folio_test_hugetlb(folio)) return 0; - } - spin_unlock_irq(&hugetlb_lock); /* * Fence off gigantic pages as there is a cyclic dependency between * alloc_contig_range and them. Return -ENOMEM as this has the effect * of bailing out right away without further retrying. */ - if (hstate_is_gigantic(h)) + if (folio_order(folio) > MAX_PAGE_ORDER) return -ENOMEM; if (folio_ref_count(folio) && folio_isolate_hugetlb(folio, list)) ret = 0; else if (!folio_ref_count(folio)) - ret = alloc_and_dissolve_hugetlb_folio(h, folio, list); + ret = alloc_and_dissolve_hugetlb_folio(folio, list); return ret; } @@ -2916,7 +2914,6 @@ int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list) */ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn) { - struct hstate *h; struct folio *folio; int ret = 0; @@ -2925,23 +2922,9 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn) while (start_pfn < end_pfn) { folio = pfn_folio(start_pfn); - /* - * The folio might have been dissolved from under our feet, so make sure - * to carefully check the state under the lock. - */ - spin_lock_irq(&hugetlb_lock); - if (folio_test_hugetlb(folio)) { - h = folio_hstate(folio); - } else { - spin_unlock_irq(&hugetlb_lock); - start_pfn++; - continue; - } - spin_unlock_irq(&hugetlb_lock); - - if (!folio_ref_count(folio)) { - ret = alloc_and_dissolve_hugetlb_folio(h, folio, - &isolate_list); + /* Not to disrupt normal path by vainly holding hugetlb_lock */ + if (folio_test_hugetlb(folio) && !folio_ref_count(folio)) { + ret = alloc_and_dissolve_hugetlb_folio(folio, &isolate_list); if (ret) break; diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 8357e1a33699..b0877035491f 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -370,36 +370,6 @@ static inline bool init_task_stack_addr(const void *addr) sizeof(init_thread_union.stack)); } -/* - * This function is invoked with report_lock (a raw_spinlock) held. A - * PREEMPT_RT kernel cannot call find_vm_area() as it will acquire a sleeping - * rt_spinlock. - * - * For !RT kernel, the PROVE_RAW_LOCK_NESTING config option will print a - * lockdep warning for this raw_spinlock -> spinlock dependency. This config - * option is enabled by default to ensure better test coverage to expose this - * kind of RT kernel problem. This lockdep splat, however, can be suppressed - * by using DEFINE_WAIT_OVERRIDE_MAP() if it serves a useful purpose and the - * invalid PREEMPT_RT case has been taken care of. - */ -static inline struct vm_struct *kasan_find_vm_area(void *addr) -{ - static DEFINE_WAIT_OVERRIDE_MAP(vmalloc_map, LD_WAIT_SLEEP); - struct vm_struct *va; - - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - return NULL; - - /* - * Suppress lockdep warning and fetch vmalloc area of the - * offending address. - */ - lock_map_acquire_try(&vmalloc_map); - va = find_vm_area(addr); - lock_map_release(&vmalloc_map); - return va; -} - static void print_address_description(void *addr, u8 tag, struct kasan_report_info *info) { @@ -429,19 +399,8 @@ static void print_address_description(void *addr, u8 tag, } if (is_vmalloc_addr(addr)) { - struct vm_struct *va = kasan_find_vm_area(addr); - - if (va) { - pr_err("The buggy address belongs to the virtual mapping at\n" - " [%px, %px) created by:\n" - " %pS\n", - va->addr, va->addr + va->size, va->caller); - pr_err("\n"); - - page = vmalloc_to_page(addr); - } else { - pr_err("The buggy address %px belongs to a vmalloc virtual mapping\n", addr); - } + pr_err("The buggy address %px belongs to a vmalloc virtual mapping\n", addr); + page = vmalloc_to_page(addr); } if (page) { diff --git a/mm/kmemleak.c b/mm/kmemleak.c index da9cee34ee1b..8d588e685311 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1247,6 +1247,20 @@ void __ref kmemleak_transient_leak(const void *ptr) EXPORT_SYMBOL(kmemleak_transient_leak); /** + * kmemleak_ignore_percpu - similar to kmemleak_ignore but taking a percpu + * address argument + * @ptr: percpu address of the object + */ +void __ref kmemleak_ignore_percpu(const void __percpu *ptr) +{ + pr_debug("%s(0x%px)\n", __func__, ptr); + + if (kmemleak_enabled && ptr && !IS_ERR_PCPU(ptr)) + make_black_object((unsigned long)ptr, OBJECT_PERCPU); +} +EXPORT_SYMBOL_GPL(kmemleak_ignore_percpu); + +/** * kmemleak_ignore - ignore an allocated object * @ptr: pointer to beginning of the object * diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 902da8a9c643..70fdeda1120b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -474,8 +474,6 @@ static const unsigned int memcg_vm_event_stat[] = { NUMA_PAGE_MIGRATE, NUMA_PTE_UPDATES, NUMA_HINT_FAULTS, - NUMA_TASK_MIGRATE, - NUMA_TASK_SWAP, #endif }; diff --git a/mm/memory.c b/mm/memory.c index 8eba595056fe..b0cda5aab398 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4315,26 +4315,6 @@ static struct folio *__alloc_swap_folio(struct vm_fault *vmf) } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) -{ - struct swap_info_struct *si = swp_swap_info(entry); - pgoff_t offset = swp_offset(entry); - int i; - - /* - * While allocating a large folio and doing swap_read_folio, which is - * the case the being faulted pte doesn't have swapcache. We need to - * ensure all PTEs have no cache as well, otherwise, we might go to - * swap devices while the content is in swapcache. - */ - for (i = 0; i < max_nr; i++) { - if ((si->swap_map[offset + i] & SWAP_HAS_CACHE)) - return i; - } - - return i; -} - /* * Check if the PTEs within a range are contiguous swap entries * and have consistent swapcache, zeromap. diff --git a/mm/migrate.c b/mm/migrate.c index 8cf0f9c9599d..2c88f3b33833 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2399,6 +2399,7 @@ set_status: static int get_compat_pages_array(const void __user *chunk_pages[], const void __user * __user *pages, + unsigned long chunk_offset, unsigned long chunk_nr) { compat_uptr_t __user *pages32 = (compat_uptr_t __user *)pages; @@ -2406,7 +2407,7 @@ static int get_compat_pages_array(const void __user *chunk_pages[], int i; for (i = 0; i < chunk_nr; i++) { - if (get_user(p, pages32 + i)) + if (get_user(p, pages32 + chunk_offset + i)) return -EFAULT; chunk_pages[i] = compat_ptr(p); } @@ -2425,27 +2426,28 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, #define DO_PAGES_STAT_CHUNK_NR 16UL const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; int chunk_status[DO_PAGES_STAT_CHUNK_NR]; + unsigned long chunk_offset = 0; while (nr_pages) { unsigned long chunk_nr = min(nr_pages, DO_PAGES_STAT_CHUNK_NR); if (in_compat_syscall()) { if (get_compat_pages_array(chunk_pages, pages, - chunk_nr)) + chunk_offset, chunk_nr)) break; } else { - if (copy_from_user(chunk_pages, pages, + if (copy_from_user(chunk_pages, pages + chunk_offset, chunk_nr * sizeof(*chunk_pages))) break; } do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); - if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status))) + if (copy_to_user(status + chunk_offset, chunk_status, + chunk_nr * sizeof(*status))) break; - pages += chunk_nr; - status += chunk_nr; + chunk_offset += chunk_nr; nr_pages -= chunk_nr; } return nr_pages ? -EFAULT : 0; diff --git a/mm/rmap.c b/mm/rmap.c index fb63d9256f09..1320b88fab74 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1845,23 +1845,32 @@ void folio_remove_rmap_pud(struct folio *folio, struct page *page, #endif } -/* We support batch unmapping of PTEs for lazyfree large folios */ -static inline bool can_batch_unmap_folio_ptes(unsigned long addr, - struct folio *folio, pte_t *ptep) +static inline unsigned int folio_unmap_pte_batch(struct folio *folio, + struct page_vma_mapped_walk *pvmw, + enum ttu_flags flags, pte_t pte) { const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; - int max_nr = folio_nr_pages(folio); - pte_t pte = ptep_get(ptep); + unsigned long end_addr, addr = pvmw->address; + struct vm_area_struct *vma = pvmw->vma; + unsigned int max_nr; + + if (flags & TTU_HWPOISON) + return 1; + if (!folio_test_large(folio)) + return 1; + /* We may only batch within a single VMA and a single page table. */ + end_addr = pmd_addr_end(addr, vma->vm_end); + max_nr = (end_addr - addr) >> PAGE_SHIFT; + + /* We only support lazyfree batching for now ... */ if (!folio_test_anon(folio) || folio_test_swapbacked(folio)) - return false; + return 1; if (pte_unused(pte)) - return false; - if (pte_pfn(pte) != folio_pfn(folio)) - return false; + return 1; - return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL, - NULL, NULL) == max_nr; + return folio_pte_batch(folio, addr, pvmw->pte, pte, max_nr, fpb_flags, + NULL, NULL, NULL); } /* @@ -2024,9 +2033,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, if (pte_dirty(pteval)) folio_mark_dirty(folio); } else if (likely(pte_present(pteval))) { - if (folio_test_large(folio) && !(flags & TTU_HWPOISON) && - can_batch_unmap_folio_ptes(address, folio, pvmw.pte)) - nr_pages = folio_nr_pages(folio); + nr_pages = folio_unmap_pte_batch(folio, &pvmw, flags, pteval); end_addr = address + nr_pages * PAGE_SIZE; flush_cache_range(vma, address, end_addr); @@ -2206,13 +2213,16 @@ discard: hugetlb_remove_rmap(folio); } else { folio_remove_rmap_ptes(folio, subpage, nr_pages, vma); - folio_ref_sub(folio, nr_pages - 1); } if (vma->vm_flags & VM_LOCKED) mlock_drain_local(); - folio_put(folio); - /* We have already batched the entire folio */ - if (nr_pages > 1) + folio_put_refs(folio, nr_pages); + + /* + * If we are sure that we batched the entire folio and cleared + * all PTEs, we can just optimize and stop right here. + */ + if (nr_pages == folio_nr_pages(folio)) goto walk_done; continue; walk_abort: diff --git a/mm/secretmem.c b/mm/secretmem.c index 589b26c2d553..9a11a38a6770 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -195,18 +195,11 @@ static struct file *secretmem_file_create(unsigned long flags) struct file *file; struct inode *inode; const char *anon_name = "[secretmem]"; - int err; - inode = alloc_anon_inode(secretmem_mnt->mnt_sb); + inode = anon_inode_make_secure_inode(secretmem_mnt->mnt_sb, anon_name, NULL); if (IS_ERR(inode)) return ERR_CAST(inode); - err = security_inode_init_security_anon(inode, &QSTR(anon_name), NULL); - if (err) { - file = ERR_PTR(err); - goto err_free_inode; - } - file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem", O_RDWR, &secretmem_fops); if (IS_ERR(file)) diff --git a/mm/shmem.c b/mm/shmem.c index 0c5fb4ffa03a..3a5a65b1f41a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2259,6 +2259,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, folio = swap_cache_get_folio(swap, NULL, 0); order = xa_get_order(&mapping->i_pages, index); if (!folio) { + int nr_pages = 1 << order; bool fallback_order0 = false; /* Or update major stats only when swapin succeeds?? */ @@ -2272,9 +2273,12 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, * If uffd is active for the vma, we need per-page fault * fidelity to maintain the uffd semantics, then fallback * to swapin order-0 folio, as well as for zswap case. + * Any existing sub folio in the swap cache also blocks + * mTHP swapin. */ if (order > 0 && ((vma && unlikely(userfaultfd_armed(vma))) || - !zswap_never_enabled())) + !zswap_never_enabled() || + non_swapcache_batch(swap, nr_pages) != nr_pages)) fallback_order0 = true; /* Skip swapcache for synchronous device. */ diff --git a/mm/shrinker.c b/mm/shrinker.c index c56c1f824f79..cbadd4d4437c 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -825,9 +825,15 @@ EXPORT_SYMBOL_GPL(shrinker_free); void shrinker_to_text(struct seq_buf *out, struct shrinker *shrinker) { - struct shrink_control sc = { .gfp_mask = GFP_KERNEL, }; + struct shrink_control sc = { + .gfp_mask = GFP_KERNEL, +#ifdef CONFIG_MEMCG + .memcg = root_mem_cgroup, +#endif + }; unsigned long nr_freed = atomic_long_read(&shrinker->objects_freed); + seq_buf_puts(out, shrinker->name); seq_buf_putc(out, '\n'); @@ -867,7 +873,12 @@ void shrinkers_to_text(struct seq_buf *out) } list_for_each_entry(shrinker, &shrinker_list, list) { - struct shrink_control sc = { .gfp_mask = GFP_KERNEL, }; + struct shrink_control sc = { + .gfp_mask = GFP_KERNEL, +#ifdef CONFIG_MEMCG + .memcg = root_mem_cgroup, +#endif + }; unsigned long mem = shrinker->count_objects(shrinker, &sc); if (!mem || mem == SHRINK_STOP || mem == SHRINK_EMPTY) diff --git a/mm/swap.h b/mm/swap.h index 2269eb9df0af..9096082a915e 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -106,6 +106,25 @@ static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr, return find_next_bit(sis->zeromap, end, start) - start; } +static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) +{ + struct swap_info_struct *si = swp_swap_info(entry); + pgoff_t offset = swp_offset(entry); + int i; + + /* + * While allocating a large folio and doing mTHP swapin, we need to + * ensure all entries are not cached, otherwise, the mTHP folio will + * be in conflict with the folio in swap cache. + */ + for (i = 0; i < max_nr; i++) { + if ((si->swap_map[offset + i] & SWAP_HAS_CACHE)) + return i; + } + + return i; +} + #else /* CONFIG_SWAP */ struct swap_iocb; static inline void swap_read_folio(struct folio *folio, struct swap_iocb **plug) @@ -199,6 +218,10 @@ static inline int swap_zeromap_batch(swp_entry_t entry, int max_nr, return 0; } +static inline int non_swapcache_batch(swp_entry_t entry, int max_nr) +{ + return 0; +} #endif /* CONFIG_SWAP */ /** diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index bc473ad21202..8253978ee0fb 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1084,8 +1084,18 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma, pte_t orig_dst_pte, pte_t orig_src_pte, pmd_t *dst_pmd, pmd_t dst_pmdval, spinlock_t *dst_ptl, spinlock_t *src_ptl, - struct folio *src_folio) + struct folio *src_folio, + struct swap_info_struct *si, swp_entry_t entry) { + /* + * Check if the folio still belongs to the target swap entry after + * acquiring the lock. Folio can be freed in the swap cache while + * not locked. + */ + if (src_folio && unlikely(!folio_test_swapcache(src_folio) || + entry.val != src_folio->swap.val)) + return -EAGAIN; + double_pt_lock(dst_ptl, src_ptl); if (!is_pte_pages_stable(dst_pte, src_pte, orig_dst_pte, orig_src_pte, @@ -1102,6 +1112,25 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma, if (src_folio) { folio_move_anon_rmap(src_folio, dst_vma); src_folio->index = linear_page_index(dst_vma, dst_addr); + } else { + /* + * Check if the swap entry is cached after acquiring the src_pte + * lock. Otherwise, we might miss a newly loaded swap cache folio. + * + * Check swap_map directly to minimize overhead, READ_ONCE is sufficient. + * We are trying to catch newly added swap cache, the only possible case is + * when a folio is swapped in and out again staying in swap cache, using the + * same entry before the PTE check above. The PTL is acquired and released + * twice, each time after updating the swap_map's flag. So holding + * the PTL here ensures we see the updated value. False positive is possible, + * e.g. SWP_SYNCHRONOUS_IO swapin may set the flag without touching the + * cache, or during the tiny synchronization window between swap cache and + * swap_map, but it will be gone very quickly, worst result is retry jitters. + */ + if (READ_ONCE(si->swap_map[swp_offset(entry)]) & SWAP_HAS_CACHE) { + double_pt_unlock(dst_ptl, src_ptl); + return -EAGAIN; + } } orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte); @@ -1412,7 +1441,7 @@ retry: } err = move_swap_pte(mm, dst_vma, dst_addr, src_addr, dst_pte, src_pte, orig_dst_pte, orig_src_pte, dst_pmd, dst_pmdval, - dst_ptl, src_ptl, src_folio); + dst_ptl, src_ptl, src_folio, si, entry); } out: diff --git a/mm/vmalloc.c b/mm/vmalloc.c index ab986dd09b6a..6dbcdceecae1 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -514,6 +514,7 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, int *nr, pgtbl_mod_mask *mask) { + int err = 0; pte_t *pte; /* @@ -530,12 +531,18 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr, do { struct page *page = pages[*nr]; - if (WARN_ON(!pte_none(ptep_get(pte)))) - return -EBUSY; - if (WARN_ON(!page)) - return -ENOMEM; - if (WARN_ON(!pfn_valid(page_to_pfn(page)))) - return -EINVAL; + if (WARN_ON(!pte_none(ptep_get(pte)))) { + err = -EBUSY; + break; + } + if (WARN_ON(!page)) { + err = -ENOMEM; + break; + } + if (WARN_ON(!pfn_valid(page_to_pfn(page)))) { + err = -EINVAL; + break; + } set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); (*nr)++; @@ -543,7 +550,8 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr, arch_leave_lazy_mmu_mode(); *mask |= PGTBL_PTE_MODIFIED; - return 0; + + return err; } static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr, diff --git a/mm/vmstat.c b/mm/vmstat.c index 429ae5339bfe..a78d70ddeacd 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1346,8 +1346,6 @@ const char * const vmstat_text[] = { "numa_hint_faults", "numa_hint_faults_local", "numa_pages_migrated", - "numa_task_migrated", - "numa_task_swapped", #endif #ifdef CONFIG_MIGRATION "pgmigrate_success", diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 73ea7e67f05a..30242fe10341 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -576,6 +576,7 @@ static int atrtr_create(struct rtentry *r, struct net_device *devhint) /* Fill in the routing entry */ rt->target = ta->sat_addr; + dev_put(rt->dev); /* Release old device */ dev_hold(devhint); rt->dev = devhint; rt->flags = r->rt_flags; diff --git a/net/atm/clip.c b/net/atm/clip.c index 61b5b700817d..f7a5565e794e 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -45,7 +45,8 @@ #include <net/atmclip.h> static struct net_device *clip_devs; -static struct atm_vcc *atmarpd; +static struct atm_vcc __rcu *atmarpd; +static DEFINE_MUTEX(atmarpd_lock); static struct timer_list idle_timer; static const struct neigh_ops clip_neigh_ops; @@ -53,24 +54,35 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip) { struct sock *sk; struct atmarp_ctrl *ctrl; + struct atm_vcc *vcc; struct sk_buff *skb; + int err = 0; pr_debug("(%d)\n", type); - if (!atmarpd) - return -EUNATCH; + + rcu_read_lock(); + vcc = rcu_dereference(atmarpd); + if (!vcc) { + err = -EUNATCH; + goto unlock; + } skb = alloc_skb(sizeof(struct atmarp_ctrl), GFP_ATOMIC); - if (!skb) - return -ENOMEM; + if (!skb) { + err = -ENOMEM; + goto unlock; + } ctrl = skb_put(skb, sizeof(struct atmarp_ctrl)); ctrl->type = type; ctrl->itf_num = itf; ctrl->ip = ip; - atm_force_charge(atmarpd, skb->truesize); + atm_force_charge(vcc, skb->truesize); - sk = sk_atm(atmarpd); + sk = sk_atm(vcc); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); - return 0; +unlock: + rcu_read_unlock(); + return err; } static void link_vcc(struct clip_vcc *clip_vcc, struct atmarp_entry *entry) @@ -193,12 +205,6 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb) pr_debug("\n"); - if (!clip_devs) { - atm_return(vcc, skb->truesize); - kfree_skb(skb); - return; - } - if (!skb) { pr_debug("removing VCC %p\n", clip_vcc); if (clip_vcc->entry) @@ -208,6 +214,11 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb) return; } atm_return(vcc, skb->truesize); + if (!clip_devs) { + kfree_skb(skb); + return; + } + skb->dev = clip_vcc->entry ? clip_vcc->entry->neigh->dev : clip_devs; /* clip_vcc->entry == NULL if we don't have an IP address yet */ if (!skb->dev) { @@ -418,6 +429,8 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout) if (!vcc->push) return -EBADFD; + if (vcc->user_back) + return -EINVAL; clip_vcc = kmalloc(sizeof(struct clip_vcc), GFP_KERNEL); if (!clip_vcc) return -ENOMEM; @@ -608,17 +621,27 @@ static void atmarpd_close(struct atm_vcc *vcc) { pr_debug("\n"); - rtnl_lock(); - atmarpd = NULL; + mutex_lock(&atmarpd_lock); + RCU_INIT_POINTER(atmarpd, NULL); + mutex_unlock(&atmarpd_lock); + + synchronize_rcu(); skb_queue_purge(&sk_atm(vcc)->sk_receive_queue); - rtnl_unlock(); pr_debug("(done)\n"); module_put(THIS_MODULE); } +static int atmarpd_send(struct atm_vcc *vcc, struct sk_buff *skb) +{ + atm_return_tx(vcc, skb); + dev_kfree_skb_any(skb); + return 0; +} + static const struct atmdev_ops atmarpd_dev_ops = { - .close = atmarpd_close + .close = atmarpd_close, + .send = atmarpd_send }; @@ -632,15 +655,18 @@ static struct atm_dev atmarpd_dev = { static int atm_init_atmarp(struct atm_vcc *vcc) { - rtnl_lock(); + if (vcc->push == clip_push) + return -EINVAL; + + mutex_lock(&atmarpd_lock); if (atmarpd) { - rtnl_unlock(); + mutex_unlock(&atmarpd_lock); return -EADDRINUSE; } mod_timer(&idle_timer, jiffies + CLIP_CHECK_INTERVAL * HZ); - atmarpd = vcc; + rcu_assign_pointer(atmarpd, vcc); set_bit(ATM_VF_META, &vcc->flags); set_bit(ATM_VF_READY, &vcc->flags); /* allow replies and avoid getting closed if signaling dies */ @@ -649,13 +675,14 @@ static int atm_init_atmarp(struct atm_vcc *vcc) vcc->push = NULL; vcc->pop = NULL; /* crash */ vcc->push_oam = NULL; /* crash */ - rtnl_unlock(); + mutex_unlock(&atmarpd_lock); return 0; } static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct atm_vcc *vcc = ATM_SD(sock); + struct sock *sk = sock->sk; int err = 0; switch (cmd) { @@ -676,14 +703,18 @@ static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) err = clip_create(arg); break; case ATMARPD_CTRL: + lock_sock(sk); err = atm_init_atmarp(vcc); if (!err) { sock->state = SS_CONNECTED; __module_get(THIS_MODULE); } + release_sock(sk); break; case ATMARP_MKIP: + lock_sock(sk); err = clip_mkip(vcc, arg); + release_sock(sk); break; case ATMARP_SETENTRY: err = clip_setentry(vcc, (__force __be32)arg); diff --git a/net/atm/common.c b/net/atm/common.c index 9b75699992ff..d7f7976ea13a 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -635,6 +635,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) skb->dev = NULL; /* for paths shared with net_device interfaces */ if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) { + atm_return_tx(vcc, skb); kfree_skb(skb); error = -EFAULT; goto out; diff --git a/net/atm/lec.c b/net/atm/lec.c index acef984f3367..afb8d3eb2185 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -124,6 +124,7 @@ static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; /* Device structures */ static struct net_device *dev_lec[MAX_LEC_ITF]; +static DEFINE_MUTEX(lec_mutex); #if IS_ENABLED(CONFIG_BRIDGE) static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) @@ -685,6 +686,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) int bytes_left; struct atmlec_ioc ioc_data; + lockdep_assert_held(&lec_mutex); /* Lecd must be up in this case */ bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc)); if (bytes_left != 0) @@ -710,6 +712,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) static int lec_mcast_attach(struct atm_vcc *vcc, int arg) { + lockdep_assert_held(&lec_mutex); if (arg < 0 || arg >= MAX_LEC_ITF) return -EINVAL; arg = array_index_nospec(arg, MAX_LEC_ITF); @@ -725,6 +728,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) int i; struct lec_priv *priv; + lockdep_assert_held(&lec_mutex); if (arg < 0) arg = 0; if (arg >= MAX_LEC_ITF) @@ -742,6 +746,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i); if (register_netdev(dev_lec[i])) { free_netdev(dev_lec[i]); + dev_lec[i] = NULL; return -EINVAL; } @@ -904,7 +909,6 @@ static void *lec_itf_walk(struct lec_state *state, loff_t *l) v = (dev && netdev_priv(dev)) ? lec_priv_walk(state, l, netdev_priv(dev)) : NULL; if (!v && dev) { - dev_put(dev); /* Partial state reset for the next time we get called */ dev = NULL; } @@ -928,6 +932,7 @@ static void *lec_seq_start(struct seq_file *seq, loff_t *pos) { struct lec_state *state = seq->private; + mutex_lock(&lec_mutex); state->itf = 0; state->dev = NULL; state->locked = NULL; @@ -945,8 +950,9 @@ static void lec_seq_stop(struct seq_file *seq, void *v) if (state->dev) { spin_unlock_irqrestore(&state->locked->lec_arp_lock, state->flags); - dev_put(state->dev); + state->dev = NULL; } + mutex_unlock(&lec_mutex); } static void *lec_seq_next(struct seq_file *seq, void *v, loff_t *pos) @@ -1003,6 +1009,7 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return -ENOIOCTLCMD; } + mutex_lock(&lec_mutex); switch (cmd) { case ATMLEC_CTRL: err = lecd_attach(vcc, (int)arg); @@ -1017,6 +1024,7 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; } + mutex_unlock(&lec_mutex); return err; } diff --git a/net/atm/raw.c b/net/atm/raw.c index 2b5f78a7ec3e..1e6511ec842c 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -36,7 +36,7 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb) pr_debug("(%d) %d -= %d\n", vcc->vci, sk_wmem_alloc_get(sk), ATM_SKB(skb)->acct_truesize); - WARN_ON(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, &sk->sk_wmem_alloc)); + atm_return_tx(vcc, skb); dev_kfree_skb_any(skb); sk->sk_write_space(sk); } diff --git a/net/atm/resources.c b/net/atm/resources.c index 995d29e7fb13..b19d851e1f44 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -146,11 +146,10 @@ void atm_dev_deregister(struct atm_dev *dev) */ mutex_lock(&atm_dev_mutex); list_del(&dev->dev_list); - mutex_unlock(&atm_dev_mutex); - atm_dev_release_vccs(dev); atm_unregister_sysfs(dev); atm_proc_dev_deregister(dev); + mutex_unlock(&atm_dev_mutex); atm_dev_put(dev); } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 07a8b4281a39..14d7221b8ac0 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -64,7 +64,7 @@ static DEFINE_IDA(hci_index_ida); /* Get HCI device by index. * Device is held on return. */ -struct hci_dev *hci_dev_get(int index) +static struct hci_dev *__hci_dev_get(int index, int *srcu_index) { struct hci_dev *hdev = NULL, *d; @@ -77,6 +77,8 @@ struct hci_dev *hci_dev_get(int index) list_for_each_entry(d, &hci_dev_list, list) { if (d->id == index) { hdev = hci_dev_hold(d); + if (srcu_index) + *srcu_index = srcu_read_lock(&d->srcu); break; } } @@ -84,6 +86,22 @@ struct hci_dev *hci_dev_get(int index) return hdev; } +struct hci_dev *hci_dev_get(int index) +{ + return __hci_dev_get(index, NULL); +} + +static struct hci_dev *hci_dev_get_srcu(int index, int *srcu_index) +{ + return __hci_dev_get(index, srcu_index); +} + +static void hci_dev_put_srcu(struct hci_dev *hdev, int srcu_index) +{ + srcu_read_unlock(&hdev->srcu, srcu_index); + hci_dev_put(hdev); +} + /* ---- Inquiry support ---- */ bool hci_discovery_active(struct hci_dev *hdev) @@ -568,9 +586,9 @@ static int hci_dev_do_reset(struct hci_dev *hdev) int hci_dev_reset(__u16 dev) { struct hci_dev *hdev; - int err; + int err, srcu_index; - hdev = hci_dev_get(dev); + hdev = hci_dev_get_srcu(dev, &srcu_index); if (!hdev) return -ENODEV; @@ -592,7 +610,7 @@ int hci_dev_reset(__u16 dev) err = hci_dev_do_reset(hdev); done: - hci_dev_put(hdev); + hci_dev_put_srcu(hdev, srcu_index); return err; } @@ -2433,6 +2451,11 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) if (!hdev) return NULL; + if (init_srcu_struct(&hdev->srcu)) { + kfree(hdev); + return NULL; + } + hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1); hdev->esco_type = (ESCO_HV1); hdev->link_mode = (HCI_LM_ACCEPT); @@ -2678,6 +2701,9 @@ void hci_unregister_dev(struct hci_dev *hdev) list_del(&hdev->list); write_unlock(&hci_dev_list_lock); + synchronize_srcu(&hdev->srcu); + cleanup_srcu_struct(&hdev->srcu); + disable_work_sync(&hdev->rx_work); disable_work_sync(&hdev->cmd_work); disable_work_sync(&hdev->tx_work); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 66052d6aaa1d..992131f88a45 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2150,40 +2150,6 @@ static u8 hci_cc_set_adv_param(struct hci_dev *hdev, void *data, return rp->status; } -static u8 hci_cc_set_ext_adv_param(struct hci_dev *hdev, void *data, - struct sk_buff *skb) -{ - struct hci_rp_le_set_ext_adv_params *rp = data; - struct hci_cp_le_set_ext_adv_params *cp; - struct adv_info *adv_instance; - - bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); - - if (rp->status) - return rp->status; - - cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS); - if (!cp) - return rp->status; - - hci_dev_lock(hdev); - hdev->adv_addr_type = cp->own_addr_type; - if (!cp->handle) { - /* Store in hdev for instance 0 */ - hdev->adv_tx_power = rp->tx_power; - } else { - adv_instance = hci_find_adv_instance(hdev, cp->handle); - if (adv_instance) - adv_instance->tx_power = rp->tx_power; - } - /* Update adv data as tx power is known now */ - hci_update_adv_data(hdev, cp->handle); - - hci_dev_unlock(hdev); - - return rp->status; -} - static u8 hci_cc_read_rssi(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -4164,8 +4130,6 @@ static const struct hci_cc { HCI_CC(HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS, hci_cc_le_read_num_adv_sets, sizeof(struct hci_rp_le_read_num_supported_adv_sets)), - HCI_CC(HCI_OP_LE_SET_EXT_ADV_PARAMS, hci_cc_set_ext_adv_param, - sizeof(struct hci_rp_le_set_ext_adv_params)), HCI_CC_STATUS(HCI_OP_LE_SET_EXT_ADV_ENABLE, hci_cc_le_set_ext_adv_enable), HCI_CC_STATUS(HCI_OP_LE_SET_ADV_SET_RAND_ADDR, @@ -7002,7 +6966,10 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, bis->iso_qos.bcast.in.sdu = le16_to_cpu(ev->max_pdu); if (!ev->status) { + bis->state = BT_CONNECTED; set_bit(HCI_CONN_BIG_SYNC, &bis->flags); + hci_debugfs_create_conn(bis); + hci_conn_add_sysfs(bis); hci_iso_setup_path(bis); } } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 6687f2a4d1eb..5f178db8d40d 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1205,9 +1205,126 @@ static int hci_set_adv_set_random_addr_sync(struct hci_dev *hdev, u8 instance, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } +static int +hci_set_ext_adv_params_sync(struct hci_dev *hdev, struct adv_info *adv, + const struct hci_cp_le_set_ext_adv_params *cp, + struct hci_rp_le_set_ext_adv_params *rp) +{ + struct sk_buff *skb; + + skb = __hci_cmd_sync(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(*cp), + cp, HCI_CMD_TIMEOUT); + + /* If command return a status event, skb will be set to -ENODATA */ + if (skb == ERR_PTR(-ENODATA)) + return 0; + + if (IS_ERR(skb)) { + bt_dev_err(hdev, "Opcode 0x%4.4x failed: %ld", + HCI_OP_LE_SET_EXT_ADV_PARAMS, PTR_ERR(skb)); + return PTR_ERR(skb); + } + + if (skb->len != sizeof(*rp)) { + bt_dev_err(hdev, "Invalid response length for 0x%4.4x: %u", + HCI_OP_LE_SET_EXT_ADV_PARAMS, skb->len); + kfree_skb(skb); + return -EIO; + } + + memcpy(rp, skb->data, sizeof(*rp)); + kfree_skb(skb); + + if (!rp->status) { + hdev->adv_addr_type = cp->own_addr_type; + if (!cp->handle) { + /* Store in hdev for instance 0 */ + hdev->adv_tx_power = rp->tx_power; + } else if (adv) { + adv->tx_power = rp->tx_power; + } + } + + return rp->status; +} + +static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) +{ + DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length, + HCI_MAX_EXT_AD_LENGTH); + u8 len; + struct adv_info *adv = NULL; + int err; + + if (instance) { + adv = hci_find_adv_instance(hdev, instance); + if (!adv || !adv->adv_data_changed) + return 0; + } + + len = eir_create_adv_data(hdev, instance, pdu->data, + HCI_MAX_EXT_AD_LENGTH); + + pdu->length = len; + pdu->handle = adv ? adv->handle : instance; + pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; + pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG; + + err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA, + struct_size(pdu, data, len), pdu, + HCI_CMD_TIMEOUT); + if (err) + return err; + + /* Update data if the command succeed */ + if (adv) { + adv->adv_data_changed = false; + } else { + memcpy(hdev->adv_data, pdu->data, len); + hdev->adv_data_len = len; + } + + return 0; +} + +static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance) +{ + struct hci_cp_le_set_adv_data cp; + u8 len; + + memset(&cp, 0, sizeof(cp)); + + len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data)); + + /* There's nothing to do if the data hasn't changed */ + if (hdev->adv_data_len == len && + memcmp(cp.data, hdev->adv_data, len) == 0) + return 0; + + memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); + hdev->adv_data_len = len; + + cp.length = len; + + return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA, + sizeof(cp), &cp, HCI_CMD_TIMEOUT); +} + +int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance) +{ + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return 0; + + if (ext_adv_capable(hdev)) + return hci_set_ext_adv_data_sync(hdev, instance); + + return hci_set_adv_data_sync(hdev, instance); +} + int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) { struct hci_cp_le_set_ext_adv_params cp; + struct hci_rp_le_set_ext_adv_params rp; bool connectable; u32 flags; bdaddr_t random_addr; @@ -1228,7 +1345,7 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) * Command Disallowed error, so we must first disable the * instance if it is active. */ - if (adv && !adv->pending) { + if (adv) { err = hci_disable_ext_adv_instance_sync(hdev, instance); if (err) return err; @@ -1316,8 +1433,12 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) cp.secondary_phy = HCI_ADV_PHY_1M; } - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); + err = hci_set_ext_adv_params_sync(hdev, adv, &cp, &rp); + if (err) + return err; + + /* Update adv data as tx power is known now */ + err = hci_set_ext_adv_data_sync(hdev, cp.handle); if (err) return err; @@ -1822,79 +1943,6 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason) sizeof(cp), &cp, HCI_CMD_TIMEOUT); } -static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) -{ - DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length, - HCI_MAX_EXT_AD_LENGTH); - u8 len; - struct adv_info *adv = NULL; - int err; - - if (instance) { - adv = hci_find_adv_instance(hdev, instance); - if (!adv || !adv->adv_data_changed) - return 0; - } - - len = eir_create_adv_data(hdev, instance, pdu->data, - HCI_MAX_EXT_AD_LENGTH); - - pdu->length = len; - pdu->handle = adv ? adv->handle : instance; - pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE; - pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG; - - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA, - struct_size(pdu, data, len), pdu, - HCI_CMD_TIMEOUT); - if (err) - return err; - - /* Update data if the command succeed */ - if (adv) { - adv->adv_data_changed = false; - } else { - memcpy(hdev->adv_data, pdu->data, len); - hdev->adv_data_len = len; - } - - return 0; -} - -static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance) -{ - struct hci_cp_le_set_adv_data cp; - u8 len; - - memset(&cp, 0, sizeof(cp)); - - len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data)); - - /* There's nothing to do if the data hasn't changed */ - if (hdev->adv_data_len == len && - memcmp(cp.data, hdev->adv_data, len) == 0) - return 0; - - memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); - hdev->adv_data_len = len; - - cp.length = len; - - return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); -} - -int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance) -{ - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - return 0; - - if (ext_adv_capable(hdev)) - return hci_set_ext_adv_data_sync(hdev, instance); - - return hci_set_adv_data_sync(hdev, instance); -} - int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance, bool force) { @@ -1970,13 +2018,10 @@ static int hci_clear_adv_sets_sync(struct hci_dev *hdev, struct sock *sk) static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force) { struct adv_info *adv, *n; - int err = 0; if (ext_adv_capable(hdev)) /* Remove all existing sets */ - err = hci_clear_adv_sets_sync(hdev, sk); - if (ext_adv_capable(hdev)) - return err; + return hci_clear_adv_sets_sync(hdev, sk); /* This is safe as long as there is no command send while the lock is * held. @@ -2004,13 +2049,11 @@ static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force) static int hci_remove_adv_sync(struct hci_dev *hdev, u8 instance, struct sock *sk) { - int err = 0; + int err; /* If we use extended advertising, instance has to be removed first. */ if (ext_adv_capable(hdev)) - err = hci_remove_ext_adv_instance_sync(hdev, instance, sk); - if (ext_adv_capable(hdev)) - return err; + return hci_remove_ext_adv_instance_sync(hdev, instance, sk); /* This is safe as long as there is no command send while the lock is * held. @@ -2109,16 +2152,13 @@ int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type) int hci_disable_advertising_sync(struct hci_dev *hdev) { u8 enable = 0x00; - int err = 0; /* If controller is not advertising we are done. */ if (!hci_dev_test_flag(hdev, HCI_LE_ADV)) return 0; if (ext_adv_capable(hdev)) - err = hci_disable_ext_adv_instance_sync(hdev, 0x00); - if (ext_adv_capable(hdev)) - return err; + return hci_disable_ext_adv_instance_sync(hdev, 0x00); return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable, HCI_CMD_TIMEOUT); @@ -2481,6 +2521,10 @@ static int hci_pause_advertising_sync(struct hci_dev *hdev) int err; int old_state; + /* If controller is not advertising we are done. */ + if (!hci_dev_test_flag(hdev, HCI_LE_ADV)) + return 0; + /* If already been paused there is nothing to do. */ if (hdev->advertising_paused) return 0; @@ -5449,7 +5493,7 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn, { struct hci_cp_disconnect cp; - if (test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) { + if (conn->type == BIS_LINK) { /* This is a BIS connection, hci_conn_del will * do the necessary cleanup. */ @@ -6277,6 +6321,7 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev, struct hci_conn *conn) { struct hci_cp_le_set_ext_adv_params cp; + struct hci_rp_le_set_ext_adv_params rp; int err; bdaddr_t random_addr; u8 own_addr_type; @@ -6318,8 +6363,12 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev, if (err) return err; - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); + err = hci_set_ext_adv_params_sync(hdev, NULL, &cp, &rp); + if (err) + return err; + + /* Update adv data as tx power is known now */ + err = hci_set_ext_adv_data_sync(hdev, cp.handle); if (err) return err; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index a5bde5db58ef..40daa38276f3 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3415,7 +3415,7 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; struct l2cap_conf_efs efs; u8 remote_efs = 0; - u16 mtu = L2CAP_DEFAULT_MTU; + u16 mtu = 0; u16 result = L2CAP_CONF_SUCCESS; u16 size; @@ -3520,6 +3520,13 @@ done: /* Configure output options and let the other side know * which ones we don't like. */ + /* If MTU is not provided in configure request, use the most recently + * explicitly or implicitly accepted value for the other direction, + * or the default value. + */ + if (mtu == 0) + mtu = chan->imtu ? chan->imtu : L2CAP_DEFAULT_MTU; + if (mtu < L2CAP_DEFAULT_MIN_MTU) result = L2CAP_CONF_UNACCEPT; else { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d540f7b4f75f..1485b455ade4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1080,7 +1080,8 @@ static int mesh_send_done_sync(struct hci_dev *hdev, void *data) struct mgmt_mesh_tx *mesh_tx; hci_dev_clear_flag(hdev, HCI_MESH_SENDING); - hci_disable_advertising_sync(hdev); + if (list_empty(&hdev->adv_instances)) + hci_disable_advertising_sync(hdev); mesh_tx = mgmt_mesh_next(hdev, NULL); if (mesh_tx) @@ -2153,6 +2154,9 @@ static int set_mesh_sync(struct hci_dev *hdev, void *data) else hci_dev_clear_flag(hdev, HCI_MESH); + hdev->le_scan_interval = __le16_to_cpu(cp->period); + hdev->le_scan_window = __le16_to_cpu(cp->window); + len -= sizeof(*cp); /* If filters don't fit, forward all adv pkts */ @@ -2167,6 +2171,7 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_mesh *cp = data; struct mgmt_pending_cmd *cmd; + __u16 period, window; int err = 0; bt_dev_dbg(hdev, "sock %p", sk); @@ -2180,6 +2185,23 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, MGMT_STATUS_INVALID_PARAMS); + /* Keep allowed ranges in sync with set_scan_params() */ + period = __le16_to_cpu(cp->period); + + if (period < 0x0004 || period > 0x4000) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_INVALID_PARAMS); + + window = __le16_to_cpu(cp->window); + + if (window < 0x0004 || window > 0x4000) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_INVALID_PARAMS); + + if (window > period) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_INVALID_PARAMS); + hci_dev_lock(hdev); cmd = mgmt_pending_add(sk, MGMT_OP_SET_MESH_RECEIVER, hdev, data, len); @@ -6432,6 +6454,7 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev, return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS, MGMT_STATUS_NOT_SUPPORTED); + /* Keep allowed ranges in sync with set_mesh() */ interval = __le16_to_cpu(cp->interval); if (interval < 0x0004 || interval > 0x4000) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 0224ef3dfec0..1377f31b719c 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2015,10 +2015,19 @@ void br_multicast_port_ctx_init(struct net_bridge_port *port, void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx) { + struct net_bridge *br = pmctx->port->br; + bool del = false; + #if IS_ENABLED(CONFIG_IPV6) timer_delete_sync(&pmctx->ip6_mc_router_timer); #endif timer_delete_sync(&pmctx->ip4_mc_router_timer); + + spin_lock_bh(&br->multicast_lock); + del |= br_ip6_multicast_rport_del(pmctx); + del |= br_ip4_multicast_rport_del(pmctx); + br_multicast_rport_del_notify(pmctx, del); + spin_unlock_bh(&br->multicast_lock); } int br_multicast_add_port(struct net_bridge_port *port) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 4ddb7490df4b..6ad84d4a2b46 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -432,6 +432,7 @@ int netpoll_send_udp(struct netpoll *np, const char *msg, int len) udph->dest = htons(np->remote_port); udph->len = htons(udp_len); + udph->check = 0; if (np->ipv6) { udph->check = csum_ipv6_magic(&np->local_ip.in6, &np->remote_ip.in6, @@ -460,7 +461,6 @@ int netpoll_send_udp(struct netpoll *np, const char *msg, int len) skb_reset_mac_header(skb); skb->protocol = eth->h_proto = htons(ETH_P_IPV6); } else { - udph->check = 0; udph->check = csum_tcpudp_magic(np->local_ip.ip, np->remote_ip.ip, udp_len, IPPROTO_UDP, diff --git a/net/core/selftests.c b/net/core/selftests.c index 35f807ea9952..406faf8e5f3f 100644 --- a/net/core/selftests.c +++ b/net/core/selftests.c @@ -160,8 +160,9 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev, skb->csum = 0; skb->ip_summed = CHECKSUM_PARTIAL; if (attr->tcp) { - thdr->check = ~tcp_v4_check(skb->len, ihdr->saddr, - ihdr->daddr, 0); + int l4len = skb->len - skb_transport_offset(skb); + + thdr->check = ~tcp_v4_check(l4len, ihdr->saddr, ihdr->daddr, 0); skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); } else { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 85fc82f72d26..d6420b74ea9c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -6261,9 +6261,6 @@ int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len) if (!pskb_may_pull(skb, write_len)) return -ENOMEM; - if (!skb_frags_readable(skb)) - return -EFAULT; - if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) return 0; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 30a5e9460d00..5a49eb99e5c4 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -319,8 +319,8 @@ static int ip_rcv_finish_core(struct net *net, const struct sk_buff *hint) { const struct iphdr *iph = ip_hdr(skb); - int err, drop_reason; struct rtable *rt; + int drop_reason; if (ip_can_use_hint(skb, iph, hint)) { drop_reason = ip_route_use_hint(skb, iph->daddr, iph->saddr, @@ -345,9 +345,10 @@ static int ip_rcv_finish_core(struct net *net, break; case IPPROTO_UDP: if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) { - err = udp_v4_early_demux(skb); - if (unlikely(err)) + drop_reason = udp_v4_early_demux(skb); + if (unlikely(drop_reason)) goto drop_error; + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f64f8276a73c..461a9ab540af 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1176,7 +1176,7 @@ restart: goto do_error; while (msg_data_left(msg)) { - ssize_t copy = 0; + int copy = 0; skb = tcp_write_queue_tail(sk); if (skb) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 9b83d639b5ac..5107121c5e37 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -3,6 +3,7 @@ #include <linux/tcp.h> #include <linux/rcupdate.h> #include <net/tcp.h> +#include <net/busy_poll.h> void tcp_fastopen_init_key_once(struct net *net) { @@ -279,6 +280,8 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, refcount_set(&req->rsk_refcnt, 2); + sk_mark_napi_id_set(child, skb); + /* Now finish processing the fastopen child socket. */ tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, skb); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8ec92dec321a..68bc79eb9019 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2479,20 +2479,33 @@ static inline bool tcp_packet_delayed(const struct tcp_sock *tp) { const struct sock *sk = (const struct sock *)tp; - if (tp->retrans_stamp && - tcp_tsopt_ecr_before(tp, tp->retrans_stamp)) - return true; /* got echoed TS before first retransmission */ - - /* Check if nothing was retransmitted (retrans_stamp==0), which may - * happen in fast recovery due to TSQ. But we ignore zero retrans_stamp - * in TCP_SYN_SENT, since when we set FLAG_SYN_ACKED we also clear - * retrans_stamp even if we had retransmitted the SYN. + /* Received an echoed timestamp before the first retransmission? */ + if (tp->retrans_stamp) + return tcp_tsopt_ecr_before(tp, tp->retrans_stamp); + + /* We set tp->retrans_stamp upon the first retransmission of a loss + * recovery episode, so normally if tp->retrans_stamp is 0 then no + * retransmission has happened yet (likely due to TSQ, which can cause + * fast retransmits to be delayed). So if snd_una advanced while + * (tp->retrans_stamp is 0 then apparently a packet was merely delayed, + * not lost. But there are exceptions where we retransmit but then + * clear tp->retrans_stamp, so we check for those exceptions. */ - if (!tp->retrans_stamp && /* no record of a retransmit/SYN? */ - sk->sk_state != TCP_SYN_SENT) /* not the FLAG_SYN_ACKED case? */ - return true; /* nothing was retransmitted */ - return false; + /* (1) For non-SACK connections, tcp_is_non_sack_preventing_reopen() + * clears tp->retrans_stamp when snd_una == high_seq. + */ + if (!tcp_is_sack(tp) && !before(tp->snd_una, tp->high_seq)) + return false; + + /* (2) In TCP_SYN_SENT tcp_clean_rtx_queue() clears tp->retrans_stamp + * when setting FLAG_SYN_ACKED is set, even if the SYN was + * retransmitted. + */ + if (sk->sk_state == TCP_SYN_SENT) + return false; + + return true; /* tp->retrans_stamp is zero; no retransmit yet */ } /* Undo procedures. */ @@ -5168,7 +5181,9 @@ end: skb_condense(skb); skb_set_owner_r(skb, sk); } - tcp_rcvbuf_grow(sk); + /* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */ + if (sk->sk_socket) + tcp_rcvbuf_grow(sk); } static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ba2ec7c870cc..870a0bd6c2ba 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3525,11 +3525,9 @@ static void addrconf_gre_config(struct net_device *dev) ASSERT_RTNL(); - idev = ipv6_find_idev(dev); - if (IS_ERR(idev)) { - pr_debug("%s: add_dev failed\n", __func__); + idev = addrconf_add_dev(dev); + if (IS_ERR(idev)) return; - } /* Generate the IPv6 link-local address using addrconf_addr_gen(), * unless we have an IPv4 GRE device not bound to an IP address and @@ -3543,9 +3541,6 @@ static void addrconf_gre_config(struct net_device *dev) } add_v4_addrs(idev); - - if (dev->flags & IFF_POINTOPOINT) - addrconf_add_mroute(dev); } #endif diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index 62618a058b8f..a247bb93908b 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -1207,6 +1207,10 @@ static int calipso_req_setattr(struct request_sock *req, struct ipv6_opt_hdr *old, *new; struct sock *sk = sk_to_full_sk(req_to_sk(req)); + /* sk is NULL for SYN+ACK w/ SYN Cookie */ + if (!sk) + return -ENOMEM; + if (req_inet->ipv6_opt && req_inet->ipv6_opt->hopopt) old = req_inet->ipv6_opt->hopopt; else @@ -1247,6 +1251,10 @@ static void calipso_req_delattr(struct request_sock *req) struct ipv6_txoptions *txopts; struct sock *sk = sk_to_full_sk(req_to_sk(req)); + /* sk is NULL for SYN+ACK w/ SYN Cookie */ + if (!sk) + return; + if (!req_inet->ipv6_opt || !req_inet->ipv6_opt->hopopt) return; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d9d88f2f2831..954795b0fe48 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1959,6 +1959,20 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, ieee80211_sta_init_nss(link_sta); if (params->opmode_notif_used) { + enum nl80211_chan_width width = link->conf->chanreq.oper.width; + + switch (width) { + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_40: + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_160: + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_320: /* not VHT, allowed for HE/EHT */ + break; + default: + return -EINVAL; + } + /* returned value is only needed for rc update, but the * rc isn't initialized here yet, so ignore it */ diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h index 5b81998cb0c9..ef7c1a68d88d 100644 --- a/net/mac80211/debug.h +++ b/net/mac80211/debug.h @@ -1,10 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Portions - * Copyright (C) 2022 - 2024 Intel Corporation + * Copyright (C) 2022 - 2025 Intel Corporation */ #ifndef __MAC80211_DEBUG_H #define __MAC80211_DEBUG_H +#include <linux/once_lite.h> #include <net/cfg80211.h> #ifdef CONFIG_MAC80211_OCB_DEBUG @@ -152,6 +153,8 @@ do { \ else \ _sdata_err((link)->sdata, fmt, ##__VA_ARGS__); \ } while (0) +#define link_err_once(link, fmt, ...) \ + DO_ONCE_LITE(link_err, link, fmt, ##__VA_ARGS__) #define link_id_info(sdata, link_id, fmt, ...) \ do { \ if (ieee80211_vif_is_mld(&sdata->vif)) \ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 7c27f3cd841c..c01634fdba78 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1150,6 +1150,8 @@ static void ieee80211_sdata_init(struct ieee80211_local *local, { sdata->local = local; + INIT_LIST_HEAD(&sdata->key_list); + /* * Initialize the default link, so we can use link_id 0 for non-MLD, * and that continues to work for non-MLD-aware drivers that use just @@ -2210,8 +2212,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ieee80211_init_frag_cache(&sdata->frags); - INIT_LIST_HEAD(&sdata->key_list); - wiphy_delayed_work_init(&sdata->dec_tailroom_needed_wk, ieee80211_delayed_tailroom_dec); diff --git a/net/mac80211/link.c b/net/mac80211/link.c index d40c2bd3b50b..4f7b7d0f64f2 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -93,9 +93,6 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, if (link_id < 0) link_id = 0; - rcu_assign_pointer(sdata->vif.link_conf[link_id], link_conf); - rcu_assign_pointer(sdata->link[link_id], link); - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { struct ieee80211_sub_if_data *ap_bss; struct ieee80211_bss_conf *ap_bss_conf; @@ -145,6 +142,9 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, ieee80211_link_debugfs_add(link); } + + rcu_assign_pointer(sdata->vif.link_conf[link_id], link_conf); + rcu_assign_pointer(sdata->link[link_id], link); } void ieee80211_link_stop(struct ieee80211_link_data *link) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2d46d4af60d7..0ed68182f79b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3934,6 +3934,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, lockdep_assert_wiphy(local->hw.wiphy); + if (frame_buf) + memset(frame_buf, 0, IEEE80211_DEAUTH_FRAME_LEN); + if (WARN_ON(!ap_sta)) return; @@ -7195,6 +7198,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, struct ieee80211_bss_conf *bss_conf = link->conf; struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg; struct ieee80211_mgmt *mgmt = (void *) hdr; + struct ieee80211_ext *ext = NULL; size_t baselen; struct ieee802_11_elems *elems; struct ieee80211_local *local = sdata->local; @@ -7220,7 +7224,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, /* Process beacon from the current BSS */ bssid = ieee80211_get_bssid(hdr, len, sdata->vif.type); if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { - struct ieee80211_ext *ext = (void *) mgmt; + ext = (void *)mgmt; variable = ext->u.s1g_beacon.variable + ieee80211_s1g_optional_len(ext->frame_control); } @@ -7407,7 +7411,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, } if ((ncrc == link->u.mgd.beacon_crc && link->u.mgd.beacon_crc_valid) || - ieee80211_is_s1g_short_beacon(mgmt->frame_control)) + (ext && ieee80211_is_s1g_short_beacon(ext->frame_control, + parse_params.start, + parse_params.len))) goto free; link->u.mgd.beacon_crc = ncrc; link->u.mgd.beacon_crc_valid = true; @@ -10699,8 +10705,8 @@ static void ieee80211_ml_epcs(struct ieee80211_sub_if_data *sdata, */ for_each_mle_subelement(sub, (const u8 *)elems->ml_epcs, elems->ml_epcs_len) { + struct ieee802_11_elems *link_elems __free(kfree) = NULL; struct ieee80211_link_data *link; - struct ieee802_11_elems *link_elems __free(kfree); u8 *pos = (void *)sub->data; u16 control; ssize_t len; diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 96584b39215e..c5e0f7f46004 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -758,7 +758,6 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, { const struct element *elem, *sub; size_t profile_len = 0; - bool found = false; if (!bss || !bss->transmitted_bss) return profile_len; @@ -809,15 +808,14 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, index[2], new_bssid); if (ether_addr_equal(new_bssid, bss->bssid)) { - found = true; elems->bssid_index_len = index[1]; elems->bssid_index = (void *)&index[2]; - break; + return profile_len; } } } - return found ? profile_len : 0; + return 0; } static void diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 09beb65d6108..e73431549ce7 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4432,6 +4432,10 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) if (!multicast && !ether_addr_equal(sdata->dev->dev_addr, hdr->addr1)) return false; + /* reject invalid/our STA address */ + if (!is_valid_ether_addr(hdr->addr2) || + ether_addr_equal(sdata->dev->dev_addr, hdr->addr2)) + return false; if (!rx->sta) { int rate_idx; if (status->encoding != RX_ENC_LEGACY) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d8d4f3d7d7f2..d58b80813bdd 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * Transmit and frame generation functions. */ @@ -5016,12 +5016,25 @@ static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata, } } -static u8 __ieee80211_beacon_update_cntdwn(struct beacon_data *beacon) +static u8 __ieee80211_beacon_update_cntdwn(struct ieee80211_link_data *link, + struct beacon_data *beacon) { - beacon->cntdwn_current_counter--; + if (beacon->cntdwn_current_counter == 1) { + /* + * Channel switch handling is done by a worker thread while + * beacons get pulled from hardware timers. It's therefore + * possible that software threads are slow enough to not be + * able to complete CSA handling in a single beacon interval, + * in which case we get here. There isn't much to do about + * it, other than letting the user know that the AP isn't + * behaving correctly. + */ + link_err_once(link, + "beacon TX faster than countdown (channel/color switch) completion\n"); + return 0; + } - /* the counter should never reach 0 */ - WARN_ON_ONCE(!beacon->cntdwn_current_counter); + beacon->cntdwn_current_counter--; return beacon->cntdwn_current_counter; } @@ -5052,7 +5065,7 @@ u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif, unsigned int link_i if (!beacon) goto unlock; - count = __ieee80211_beacon_update_cntdwn(beacon); + count = __ieee80211_beacon_update_cntdwn(link, beacon); unlock: rcu_read_unlock(); @@ -5450,7 +5463,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, if (beacon->cntdwn_counter_offsets[0]) { if (!is_template) - __ieee80211_beacon_update_cntdwn(beacon); + __ieee80211_beacon_update_cntdwn(link, beacon); ieee80211_set_beacon_cntdwn(sdata, beacon, link); } @@ -5482,7 +5495,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, * for now we leave it consistent with overall * mac80211's behavior. */ - __ieee80211_beacon_update_cntdwn(beacon); + __ieee80211_beacon_update_cntdwn(link, beacon); ieee80211_set_beacon_cntdwn(sdata, beacon, link); } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 27d414efa3fd..e66da651678a 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2144,11 +2144,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) cfg80211_sched_scan_stopped_locked(local->hw.wiphy, 0); wake_up: - - if (local->virt_monitors > 0 && - local->virt_monitors == local->open_count) - ieee80211_add_virtual_monitor(local); - /* * Clear the WLAN_STA_BLOCK_BA flag so new aggregation * sessions can be established after a resume. @@ -2202,6 +2197,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) } } + if (local->virt_monitors > 0 && + local->virt_monitors == local->open_count) + ieee80211_add_virtual_monitor(local); + if (!suspended) return 0; @@ -3884,7 +3883,7 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local, { u64 tsf = drv_get_tsf(local, sdata); u64 dtim_count = 0; - u16 beacon_int = sdata->vif.bss_conf.beacon_int * 1024; + u32 beacon_int = sdata->vif.bss_conf.beacon_int * 1024; u8 dtim_period = sdata->vif.bss_conf.dtim_period; struct ps_data *ps; u8 bcns_from_dtim; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index d536c97144e9..47d7dfd9ad09 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -81,8 +81,8 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) if (index < net->mpls.platform_labels) { struct mpls_route __rcu **platform_label = - rcu_dereference(net->mpls.platform_label); - rt = rcu_dereference(platform_label[index]); + rcu_dereference_rtnl(net->mpls.platform_label); + rt = rcu_dereference_rtnl(platform_label[index]); } return rt; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index e8972a857e51..6332a0e06596 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -387,7 +387,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) WARN_ON(skb->sk != NULL); skb->sk = sk; skb->destructor = netlink_skb_destructor; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); sk_mem_charge(sk, skb->truesize); } @@ -1212,41 +1211,48 @@ struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast) int netlink_attachskb(struct sock *sk, struct sk_buff *skb, long *timeo, struct sock *ssk) { + DECLARE_WAITQUEUE(wait, current); struct netlink_sock *nlk; + unsigned int rmem; nlk = nlk_sk(sk); + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); - if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_S_CONGESTED, &nlk->state))) { - DECLARE_WAITQUEUE(wait, current); - if (!*timeo) { - if (!ssk || netlink_is_kernel(ssk)) - netlink_overrun(sk); - sock_put(sk); - kfree_skb(skb); - return -EAGAIN; - } - - __set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&nlk->wait, &wait); + if ((rmem == skb->truesize || rmem < READ_ONCE(sk->sk_rcvbuf)) && + !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { + netlink_skb_set_owner_r(skb, sk); + return 0; + } - if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - test_bit(NETLINK_S_CONGESTED, &nlk->state)) && - !sock_flag(sk, SOCK_DEAD)) - *timeo = schedule_timeout(*timeo); + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&nlk->wait, &wait); + if (!*timeo) { + if (!ssk || netlink_is_kernel(ssk)) + netlink_overrun(sk); sock_put(sk); + kfree_skb(skb); + return -EAGAIN; + } - if (signal_pending(current)) { - kfree_skb(skb); - return sock_intr_errno(*timeo); - } - return 1; + __set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&nlk->wait, &wait); + rmem = atomic_read(&sk->sk_rmem_alloc); + + if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) || + test_bit(NETLINK_S_CONGESTED, &nlk->state)) && + !sock_flag(sk, SOCK_DEAD)) + *timeo = schedule_timeout(*timeo); + + __set_current_state(TASK_RUNNING); + remove_wait_queue(&nlk->wait, &wait); + sock_put(sk); + + if (signal_pending(current)) { + kfree_skb(skb); + return sock_intr_errno(*timeo); } - netlink_skb_set_owner_r(skb, sk); - return 0; + + return 1; } static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) @@ -1307,6 +1313,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, ret = -ECONNREFUSED; if (nlk->netlink_rcv != NULL) { ret = skb->len; + atomic_add(skb->truesize, &sk->sk_rmem_alloc); netlink_skb_set_owner_r(skb, sk); NETLINK_CB(skb).sk = ssk; netlink_deliver_tap_kernel(sk, ssk, skb); @@ -1383,13 +1390,19 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check); static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) { struct netlink_sock *nlk = nlk_sk(sk); + unsigned int rmem, rcvbuf; - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); + rcvbuf = READ_ONCE(sk->sk_rcvbuf); + + if ((rmem == skb->truesize || rmem <= rcvbuf) && !test_bit(NETLINK_S_CONGESTED, &nlk->state)) { netlink_skb_set_owner_r(skb, sk); __netlink_sendskb(sk, skb); - return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); + return rmem > (rcvbuf >> 1); } + + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); return -1; } @@ -2245,6 +2258,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken) struct netlink_ext_ack extack = {}; struct netlink_callback *cb; struct sk_buff *skb = NULL; + unsigned int rmem, rcvbuf; size_t max_recvmsg_len; struct module *module; int err = -ENOBUFS; @@ -2258,9 +2272,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken) goto errout_skb; } - if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) - goto errout_skb; - /* NLMSG_GOODSIZE is small to avoid high order allocations being * required, but it makes sense to _attempt_ a 32KiB allocation * to reduce number of system calls on dump operations, if user @@ -2283,6 +2294,13 @@ static int netlink_dump(struct sock *sk, bool lock_taken) if (!skb) goto errout_skb; + rcvbuf = READ_ONCE(sk->sk_rcvbuf); + rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc); + if (rmem != skb->truesize && rmem >= rcvbuf) { + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + goto errout_skb; + } + /* Trim skb to allocated size. User is expected to provide buffer as * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at * netlink_recvmsg())). dump will pack as many smaller messages as diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index ed1508a9e093..aab107727f18 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -119,22 +119,22 @@ static int nci_uart_set_driver(struct tty_struct *tty, unsigned int driver) memcpy(nu, nci_uart_drivers[driver], sizeof(struct nci_uart)); nu->tty = tty; - tty->disc_data = nu; skb_queue_head_init(&nu->tx_q); INIT_WORK(&nu->write_work, nci_uart_write_work); spin_lock_init(&nu->rx_lock); ret = nu->ops.open(nu); if (ret) { - tty->disc_data = NULL; kfree(nu); + return ret; } else if (!try_module_get(nu->owner)) { nu->ops.close(nu); - tty->disc_data = NULL; kfree(nu); return -ENOENT; } - return ret; + tty->disc_data = nu; + + return 0; } /* ------ LDISC part ------ */ diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index e7269a3eec79..3add108340bf 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -39,16 +39,14 @@ #include "flow_netlink.h" #include "openvswitch_trace.h" -DEFINE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage) = { - .bh_lock = INIT_LOCAL_LOCK(bh_lock), -}; +struct ovs_pcpu_storage __percpu *ovs_pcpu_storage; /* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys' * space. Return NULL if out of key spaces. */ static struct sw_flow_key *clone_key(const struct sw_flow_key *key_) { - struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage); + struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage); struct action_flow_keys *keys = &ovs_pcpu->flow_keys; int level = ovs_pcpu->exec_level; struct sw_flow_key *key = NULL; @@ -94,7 +92,7 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb, const struct nlattr *actions, const int actions_len) { - struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos); + struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos); struct deferred_action *da; da = action_fifo_put(fifo); @@ -755,7 +753,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key, static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage.frag_data); + struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage->frag_data); struct vport *vport = data->vport; if (skb_cow_head(skb, data->l2_len) < 0) { @@ -807,7 +805,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb, unsigned int hlen = skb_network_offset(skb); struct ovs_frag_data *data; - data = this_cpu_ptr(&ovs_pcpu_storage.frag_data); + data = this_cpu_ptr(&ovs_pcpu_storage->frag_data); data->dst = skb->_skb_refdst; data->vport = vport; data->cb = *OVS_CB(skb); @@ -1566,16 +1564,15 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb, clone = clone_flow_key ? clone_key(key) : key; if (clone) { int err = 0; - if (actions) { /* Sample action */ if (clone_flow_key) - __this_cpu_inc(ovs_pcpu_storage.exec_level); + __this_cpu_inc(ovs_pcpu_storage->exec_level); err = do_execute_actions(dp, skb, clone, actions, len); if (clone_flow_key) - __this_cpu_dec(ovs_pcpu_storage.exec_level); + __this_cpu_dec(ovs_pcpu_storage->exec_level); } else { /* Recirc action */ clone->recirc_id = recirc_id; ovs_dp_process_packet(skb, clone); @@ -1611,7 +1608,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb, static void process_deferred_actions(struct datapath *dp) { - struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos); + struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos); /* Do not touch the FIFO in case there is no deferred actions. */ if (action_fifo_is_empty(fifo)) @@ -1642,7 +1639,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, { int err, level; - level = __this_cpu_inc_return(ovs_pcpu_storage.exec_level); + level = __this_cpu_inc_return(ovs_pcpu_storage->exec_level); if (unlikely(level > OVS_RECURSION_LIMIT)) { net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n", ovs_dp_name(dp)); @@ -1659,6 +1656,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, process_deferred_actions(dp); out: - __this_cpu_dec(ovs_pcpu_storage.exec_level); + __this_cpu_dec(ovs_pcpu_storage->exec_level); return err; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6a304ae2d959..b990dc83504f 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -244,7 +244,7 @@ void ovs_dp_detach_port(struct vport *p) /* Must be called with rcu_read_lock. */ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) { - struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage); + struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage); const struct vport *p = OVS_CB(skb)->input_vport; struct datapath *dp = p->dp; struct sw_flow *flow; @@ -299,7 +299,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) * avoided. */ if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) { - local_lock_nested_bh(&ovs_pcpu_storage.bh_lock); + local_lock_nested_bh(&ovs_pcpu_storage->bh_lock); ovs_pcpu->owner = current; ovs_pcpu_locked = true; } @@ -310,7 +310,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) ovs_dp_name(dp), error); if (ovs_pcpu_locked) { ovs_pcpu->owner = NULL; - local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock); + local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock); } stats_counter = &stats->n_hit; @@ -689,13 +689,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) sf_acts = rcu_dereference(flow->sf_acts); local_bh_disable(); - local_lock_nested_bh(&ovs_pcpu_storage.bh_lock); + local_lock_nested_bh(&ovs_pcpu_storage->bh_lock); if (IS_ENABLED(CONFIG_PREEMPT_RT)) - this_cpu_write(ovs_pcpu_storage.owner, current); + this_cpu_write(ovs_pcpu_storage->owner, current); err = ovs_execute_actions(dp, packet, sf_acts, &flow->key); if (IS_ENABLED(CONFIG_PREEMPT_RT)) - this_cpu_write(ovs_pcpu_storage.owner, NULL); - local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock); + this_cpu_write(ovs_pcpu_storage->owner, NULL); + local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock); local_bh_enable(); rcu_read_unlock(); @@ -2744,6 +2744,28 @@ static struct drop_reason_list drop_reason_list_ovs = { .n_reasons = ARRAY_SIZE(ovs_drop_reasons), }; +static int __init ovs_alloc_percpu_storage(void) +{ + unsigned int cpu; + + ovs_pcpu_storage = alloc_percpu(*ovs_pcpu_storage); + if (!ovs_pcpu_storage) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + struct ovs_pcpu_storage *ovs_pcpu; + + ovs_pcpu = per_cpu_ptr(ovs_pcpu_storage, cpu); + local_lock_init(&ovs_pcpu->bh_lock); + } + return 0; +} + +static void ovs_free_percpu_storage(void) +{ + free_percpu(ovs_pcpu_storage); +} + static int __init dp_init(void) { int err; @@ -2753,6 +2775,10 @@ static int __init dp_init(void) pr_info("Open vSwitch switching datapath\n"); + err = ovs_alloc_percpu_storage(); + if (err) + goto error; + err = ovs_internal_dev_rtnl_link_register(); if (err) goto error; @@ -2799,6 +2825,7 @@ error_flow_exit: error_unreg_rtnl_link: ovs_internal_dev_rtnl_link_unregister(); error: + ovs_free_percpu_storage(); return err; } @@ -2813,6 +2840,7 @@ static void dp_cleanup(void) ovs_vport_exit(); ovs_flow_exit(); ovs_internal_dev_rtnl_link_unregister(); + ovs_free_percpu_storage(); } module_init(dp_init); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 1b5348b0f559..cfeb817a1889 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -220,7 +220,8 @@ struct ovs_pcpu_storage { struct task_struct *owner; local_lock_t bh_lock; }; -DECLARE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage); + +extern struct ovs_pcpu_storage __percpu *ovs_pcpu_storage; /** * enum ovs_pkt_hash_types - hash info to include with a packet diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 2dd6bd3a3011..b72bf8a08d48 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -497,22 +497,15 @@ void rose_rt_device_down(struct net_device *dev) t = rose_node; rose_node = rose_node->next; - for (i = 0; i < t->count; i++) { + for (i = t->count - 1; i >= 0; i--) { if (t->neighbour[i] != s) continue; t->count--; - switch (i) { - case 0: - t->neighbour[0] = t->neighbour[1]; - fallthrough; - case 1: - t->neighbour[1] = t->neighbour[2]; - break; - case 2: - break; - } + memmove(&t->neighbour[i], &t->neighbour[i + 1], + sizeof(t->neighbour[0]) * + (t->count - i)); } if (t->count <= 0) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 5bd3922c310d..376e33dce8c1 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -361,12 +361,15 @@ struct rxrpc_local { struct list_head new_client_calls; /* Newly created client calls need connection */ spinlock_t client_call_lock; /* Lock for ->new_client_calls */ struct sockaddr_rxrpc srx; /* local address */ - /* Provide a kvec table sufficiently large to manage either a DATA - * packet with a maximum set of jumbo subpackets or a PING ACK padded - * out to 64K with zeropages for PMTUD. - */ - struct kvec kvec[1 + RXRPC_MAX_NR_JUMBO > 3 + 16 ? - 1 + RXRPC_MAX_NR_JUMBO : 3 + 16]; + union { + /* Provide a kvec table sufficiently large to manage either a + * DATA packet with a maximum set of jumbo subpackets or a PING + * ACK padded out to 64K with zeropages for PMTUD. + */ + struct kvec kvec[1 + RXRPC_MAX_NR_JUMBO > 3 + 16 ? + 1 + RXRPC_MAX_NR_JUMBO : 3 + 16]; + struct bio_vec bvec[3 + 16]; + }; }; /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index a4b363b47cca..49fccee1a726 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -149,6 +149,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, id_in_use: write_unlock(&rx->call_lock); + rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EBADSLT); rxrpc_cleanup_call(call); _leave(" = -EBADSLT"); return -EBADSLT; @@ -254,6 +255,9 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, unsigned short call_tail, conn_tail, peer_tail; unsigned short call_count, conn_count; + if (!b) + return NULL; + /* #calls >= #conns >= #peers must hold true. */ call_head = smp_load_acquire(&b->call_backlog_head); call_tail = b->call_backlog_tail; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 0af19bcdc80a..ef7b3096c95e 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -924,7 +924,7 @@ void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response { struct rxrpc_skb_priv *sp = rxrpc_skb(response); struct scatterlist sg[16]; - struct bio_vec bvec[16]; + struct bio_vec *bvec = conn->local->bvec; struct msghdr msg; size_t len = sp->resp.len; __be32 wserial; @@ -938,6 +938,9 @@ void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response if (ret < 0) goto fail; nr_sg = ret; + ret = -EIO; + if (WARN_ON_ONCE(nr_sg > ARRAY_SIZE(conn->local->bvec))) + goto fail; for (int i = 0; i < nr_sg; i++) bvec_set_page(&bvec[i], sg_page(&sg[i]), sg[i].length, sg[i].offset); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c5e3673aadbe..d7c767b861a4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -336,17 +336,22 @@ out: return q; } -static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) +static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid, + struct netlink_ext_ack *extack) { unsigned long cl; const struct Qdisc_class_ops *cops = p->ops->cl_ops; - if (cops == NULL) - return NULL; + if (cops == NULL) { + NL_SET_ERR_MSG(extack, "Parent qdisc is not classful"); + return ERR_PTR(-EOPNOTSUPP); + } cl = cops->find(p, classid); - if (cl == 0) - return NULL; + if (cl == 0) { + NL_SET_ERR_MSG(extack, "Specified class not found"); + return ERR_PTR(-ENOENT); + } return cops->leaf(p, cl); } @@ -596,16 +601,6 @@ out: qdisc_skb_cb(skb)->pkt_len = pkt_len; } -void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) -{ - if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { - pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", - txt, qdisc->ops->id, qdisc->handle >> 16); - qdisc->flags |= TCQ_F_WARN_NONWC; - } -} -EXPORT_SYMBOL(qdisc_warn_nonwc); - static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, @@ -780,15 +775,12 @@ static u32 qdisc_alloc_handle(struct net_device *dev) void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) { - bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; bool notify; int drops; - if (n == 0 && len == 0) - return; drops = max_t(int, n, 0); rcu_read_lock(); while ((parentid = sch->parent)) { @@ -797,17 +789,8 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) if (sch->flags & TCQ_F_NOPARENT) break; - /* Notify parent qdisc only if child qdisc becomes empty. - * - * If child was empty even before update then backlog - * counter is screwed and we skip notification because - * parent class is already passive. - * - * If the original child was offloaded then it is allowed - * to be seem as empty, so the parent is notified anyway. - */ - notify = !sch->q.qlen && !WARN_ON_ONCE(!n && - !qdisc_is_offloaded); + /* Notify parent qdisc only if child qdisc becomes empty. */ + notify = !sch->q.qlen; /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { @@ -816,6 +799,9 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) } cops = sch->ops->cl_ops; if (notify && cops->qlen_notify) { + /* Note that qlen_notify must be idempotent as it may get called + * multiple times. + */ cl = cops->find(sch, parentid); cops->qlen_notify(sch, cl); } @@ -1499,7 +1485,7 @@ static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid"); return -ENOENT; } - q = qdisc_leaf(p, clid); + q = qdisc_leaf(p, clid, extack); } else if (dev_ingress_queue(dev)) { q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } @@ -1510,6 +1496,8 @@ static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); return -ENOENT; } + if (IS_ERR(q)) + return PTR_ERR(q); if (tcm->tcm_handle && q->handle != tcm->tcm_handle) { NL_SET_ERR_MSG(extack, "Invalid handle"); @@ -1611,7 +1599,9 @@ static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); return -ENOENT; } - q = qdisc_leaf(p, clid); + q = qdisc_leaf(p, clid, extack); + if (IS_ERR(q)) + return PTR_ERR(q); } else if (dev_ingress_queue_create(dev)) { q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 5a7745170e84..d8fd35da32a7 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -835,22 +835,6 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) } } -static unsigned int -qdisc_peek_len(struct Qdisc *sch) -{ - struct sk_buff *skb; - unsigned int len; - - skb = sch->ops->peek(sch); - if (unlikely(skb == NULL)) { - qdisc_warn_nonwc("qdisc_peek_len", sch); - return 0; - } - len = qdisc_pkt_len(skb); - - return len; -} - static void hfsc_adjust_levels(struct hfsc_class *cl) { diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index bf1282cb22eb..bcce36608871 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -989,7 +989,7 @@ static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg, if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */ list_del_init(&cl->alist); - else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) { + else if (cl->deficit < qdisc_peek_len(cl->qdisc)) { cl->deficit += agg->lmax; list_move_tail(&cl->alist, &agg->active); } diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 14021b812329..2b14c81a87e5 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1328,13 +1328,15 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, stab = rtnl_dereference(q->root->stab); - oper = rtnl_dereference(q->oper_sched); + rcu_read_lock(); + oper = rcu_dereference(q->oper_sched); if (oper) taprio_update_queue_max_sdu(q, oper, stab); - admin = rtnl_dereference(q->admin_sched); + admin = rcu_dereference(q->admin_sched); if (admin) taprio_update_queue_max_sdu(q, admin, stab); + rcu_read_unlock(); break; } diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 0fa244f16876..7b943fbafcc3 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1724,7 +1724,7 @@ gss_validate(struct rpc_task *task, struct xdr_stream *xdr) maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[0], seq, p, len); /* RFC 2203 5.3.3.1 - compute the checksum of each sequence number in the cache */ while (unlikely(maj_stat == GSS_S_BAD_SIG && i < task->tk_rqstp->rq_seqno_count)) - maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[i], seq, p, len); + maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[i++], seq, p, len); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 939b6239df8a..9c93b854e809 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -638,8 +638,6 @@ EXPORT_SYMBOL_GPL(svc_destroy); static bool svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node) { - unsigned long ret; - rqstp->rq_maxpages = svc_serv_maxpages(serv); /* rq_pages' last entry is NULL for historical reasons. */ @@ -649,9 +647,7 @@ svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node) if (!rqstp->rq_pages) return false; - ret = alloc_pages_bulk_node(GFP_KERNEL, node, rqstp->rq_maxpages, - rqstp->rq_pages); - return ret == rqstp->rq_maxpages; + return true; } /* @@ -1375,7 +1371,8 @@ svc_process_common(struct svc_rqst *rqstp) case SVC_OK: break; case SVC_GARBAGE: - goto err_garbage_args; + rqstp->rq_auth_stat = rpc_autherr_badcred; + goto err_bad_auth; case SVC_SYSERR: goto err_system_err; case SVC_DENIED: @@ -1516,14 +1513,6 @@ err_bad_proc: *rqstp->rq_accept_statp = rpc_proc_unavail; goto sendit; -err_garbage_args: - svc_printk(rqstp, "failed to decode RPC header\n"); - - if (serv->sv_stats) - serv->sv_stats->rpcbadfmt++; - *rqstp->rq_accept_statp = rpc_garbage_args; - goto sendit; - err_system_err: if (serv->sv_stats) serv->sv_stats->rpcbadfmt++; diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 8ee0c07d00e9..ffe577bf6b51 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -704,8 +704,10 @@ static void tipc_topsrv_stop(struct net *net) for (id = 0; srv->idr_in_use; id++) { con = idr_find(&srv->conn_idr, id); if (con) { + conn_get(con); spin_unlock_bh(&srv->idr_lock); tipc_conn_close(con); + conn_put(con); spin_lock_bh(&srv->idr_lock); } } diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 108a4cc2e001..258d6aa4f21a 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -489,7 +489,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); b = tipc_bearer_find(net, bname); - if (!b) { + if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) { rtnl_unlock(); return -EINVAL; } @@ -500,7 +500,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); b = rtnl_dereference(tn->bearer_list[bid]); - if (!b) { + if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) { rtnl_unlock(); return -EINVAL; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 22e170fb5dda..52b155123985 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -660,6 +660,11 @@ static void unix_sock_destructor(struct sock *sk) #endif } +static unsigned int unix_skb_len(const struct sk_buff *skb) +{ + return skb->len - UNIXCB(skb).consumed; +} + static void unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); @@ -694,10 +699,16 @@ static void unix_release_sock(struct sock *sk, int embrion) if (skpair != NULL) { if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { + struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); + +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (skb && !unix_skb_len(skb)) + skb = skb_peek_next(skb, &sk->sk_receive_queue); +#endif unix_state_lock(skpair); /* No more writes */ WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK); - if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion) + if (skb || embrion) WRITE_ONCE(skpair->sk_err, ECONNRESET); unix_state_unlock(skpair); skpair->sk_state_change(skpair); @@ -2661,11 +2672,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, return timeo; } -static unsigned int unix_skb_len(const struct sk_buff *skb) -{ - return skb->len - UNIXCB(skb).consumed; -} - struct unix_stream_read_state { int (*recv_actor)(struct sk_buff *, int, int, struct unix_stream_read_state *); @@ -2680,11 +2686,11 @@ struct unix_stream_read_state { #if IS_ENABLED(CONFIG_AF_UNIX_OOB) static int unix_stream_recv_urg(struct unix_stream_read_state *state) { + struct sk_buff *oob_skb, *read_skb = NULL; struct socket *sock = state->socket; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); int chunk = 1; - struct sk_buff *oob_skb; mutex_lock(&u->iolock); unix_state_lock(sk); @@ -2699,9 +2705,16 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) oob_skb = u->oob_skb; - if (!(state->flags & MSG_PEEK)) + if (!(state->flags & MSG_PEEK)) { WRITE_ONCE(u->oob_skb, NULL); + if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue && + !unix_skb_len(oob_skb->prev)) { + read_skb = oob_skb->prev; + __skb_unlink(read_skb, &sk->sk_receive_queue); + } + } + spin_unlock(&sk->sk_receive_queue.lock); unix_state_unlock(sk); @@ -2712,6 +2725,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) mutex_unlock(&u->iolock); + consume_skb(read_skb); + if (chunk < 0) return -EFAULT; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 2e7a3034e965..1053662725f8 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -407,6 +407,8 @@ EXPORT_SYMBOL_GPL(vsock_enqueue_accept); static bool vsock_use_local_transport(unsigned int remote_cid) { + lockdep_assert_held(&vsock_register_mutex); + if (!transport_local) return false; @@ -464,6 +466,8 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) remote_flags = vsk->remote_addr.svm_flags; + mutex_lock(&vsock_register_mutex); + switch (sk->sk_type) { case SOCK_DGRAM: new_transport = transport_dgram; @@ -479,12 +483,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) new_transport = transport_h2g; break; default: - return -ESOCKTNOSUPPORT; + ret = -ESOCKTNOSUPPORT; + goto err; } if (vsk->transport) { - if (vsk->transport == new_transport) - return 0; + if (vsk->transport == new_transport) { + ret = 0; + goto err; + } /* transport->release() must be called with sock lock acquired. * This path can only be taken during vsock_connect(), where we @@ -508,8 +515,16 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) /* We increase the module refcnt to prevent the transport unloading * while there are open sockets assigned to it. */ - if (!new_transport || !try_module_get(new_transport->module)) - return -ENODEV; + if (!new_transport || !try_module_get(new_transport->module)) { + ret = -ENODEV; + goto err; + } + + /* It's safe to release the mutex after a successful try_module_get(). + * Whichever transport `new_transport` points at, it won't go away until + * the last module_put() below or in vsock_deassign_transport(). + */ + mutex_unlock(&vsock_register_mutex); if (sk->sk_type == SOCK_SEQPACKET) { if (!new_transport->seqpacket_allow || @@ -528,12 +543,31 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) vsk->transport = new_transport; return 0; +err: + mutex_unlock(&vsock_register_mutex); + return ret; } EXPORT_SYMBOL_GPL(vsock_assign_transport); +/* + * Provide safe access to static transport_{h2g,g2h,dgram,local} callbacks. + * Otherwise we may race with module removal. Do not use on `vsk->transport`. + */ +static u32 vsock_registered_transport_cid(const struct vsock_transport **transport) +{ + u32 cid = VMADDR_CID_ANY; + + mutex_lock(&vsock_register_mutex); + if (*transport) + cid = (*transport)->get_local_cid(); + mutex_unlock(&vsock_register_mutex); + + return cid; +} + bool vsock_find_cid(unsigned int cid) { - if (transport_g2h && cid == transport_g2h->get_local_cid()) + if (cid == vsock_registered_transport_cid(&transport_g2h)) return true; if (transport_h2g && cid == VMADDR_CID_HOST) @@ -2536,18 +2570,19 @@ static long vsock_dev_do_ioctl(struct file *filp, unsigned int cmd, void __user *ptr) { u32 __user *p = ptr; - u32 cid = VMADDR_CID_ANY; int retval = 0; + u32 cid; switch (cmd) { case IOCTL_VM_SOCKETS_GET_LOCAL_CID: /* To be compatible with the VMCI behavior, we prioritize the * guest CID instead of well-know host CID (VMADDR_CID_HOST). */ - if (transport_g2h) - cid = transport_g2h->get_local_cid(); - else if (transport_h2g) - cid = transport_h2g->get_local_cid(); + cid = vsock_registered_transport_cid(&transport_g2h); + if (cid == VMADDR_CID_ANY) + cid = vsock_registered_transport_cid(&transport_h2g); + if (cid == VMADDR_CID_ANY) + cid = vsock_registered_transport_cid(&transport_local); if (put_user(cid, p) != 0) retval = -EFAULT; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index b370070194fa..7eccd6708d66 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -119,6 +119,8 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt, u16 proto, struct vmci_handle handle) { + memset(pkt, 0, sizeof(*pkt)); + /* We register the stream control handler as an any cid handle so we * must always send from a source address of VMADDR_CID_ANY */ @@ -131,8 +133,6 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt, pkt->type = type; pkt->src_port = src->svm_port; pkt->dst_port = dst->svm_port; - memset(&pkt->proto, 0, sizeof(pkt->proto)); - memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2)); switch (pkt->type) { case VMCI_TRANSPORT_PACKET_TYPE_INVALID: diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 85f139016da2..50202d170f3a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -229,6 +229,7 @@ static int validate_beacon_head(const struct nlattr *attr, unsigned int len = nla_len(attr); const struct element *elem; const struct ieee80211_mgmt *mgmt = (void *)data; + const struct ieee80211_ext *ext; unsigned int fixedlen, hdrlen; bool s1g_bcn; @@ -237,8 +238,10 @@ static int validate_beacon_head(const struct nlattr *attr, s1g_bcn = ieee80211_is_s1g_beacon(mgmt->frame_control); if (s1g_bcn) { - fixedlen = offsetof(struct ieee80211_ext, - u.s1g_beacon.variable); + ext = (struct ieee80211_ext *)mgmt; + fixedlen = + offsetof(struct ieee80211_ext, u.s1g_beacon.variable) + + ieee80211_s1g_optional_len(ext->frame_control); hdrlen = offsetof(struct ieee80211_ext, u.s1g_beacon); } else { fixedlen = offsetof(struct ieee80211_mgmt, diff --git a/net/wireless/util.c b/net/wireless/util.c index ed868c0f7ca8..1ad5a6bdfd75 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -820,6 +820,52 @@ bool ieee80211_is_valid_amsdu(struct sk_buff *skb, u8 mesh_hdr) } EXPORT_SYMBOL(ieee80211_is_valid_amsdu); + +/* + * Detects if an MSDU frame was maliciously converted into an A-MSDU + * frame by an adversary. This is done by parsing the received frame + * as if it were a regular MSDU, even though the A-MSDU flag is set. + * + * For non-mesh interfaces, detection involves checking whether the + * payload, when interpreted as an MSDU, begins with a valid RFC1042 + * header. This is done by comparing the A-MSDU subheader's destination + * address to the start of the RFC1042 header. + * + * For mesh interfaces, the MSDU includes a 6-byte Mesh Control field + * and an optional variable-length Mesh Address Extension field before + * the RFC1042 header. The position of the RFC1042 header must therefore + * be calculated based on the mesh header length. + * + * Since this function intentionally parses an A-MSDU frame as an MSDU, + * it only assumes that the A-MSDU subframe header is present, and + * beyond this it performs its own bounds checks under the assumption + * that the frame is instead parsed as a non-aggregated MSDU. + */ +static bool +is_amsdu_aggregation_attack(struct ethhdr *eth, struct sk_buff *skb, + enum nl80211_iftype iftype) +{ + int offset; + + /* Non-mesh case can be directly compared */ + if (iftype != NL80211_IFTYPE_MESH_POINT) + return ether_addr_equal(eth->h_dest, rfc1042_header); + + offset = __ieee80211_get_mesh_hdrlen(eth->h_dest[0]); + if (offset == 6) { + /* Mesh case with empty address extension field */ + return ether_addr_equal(eth->h_source, rfc1042_header); + } else if (offset + ETH_ALEN <= skb->len) { + /* Mesh case with non-empty address extension field */ + u8 temp[ETH_ALEN]; + + skb_copy_bits(skb, offset, temp, ETH_ALEN); + return ether_addr_equal(temp, rfc1042_header); + } + + return false; +} + void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, const unsigned int extra_headroom, @@ -861,8 +907,10 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, /* the last MSDU has no padding */ if (subframe_len > remaining) goto purge; - /* mitigate A-MSDU aggregation injection attacks */ - if (ether_addr_equal(hdr.eth.h_dest, rfc1042_header)) + /* mitigate A-MSDU aggregation injection attacks, to be + * checked when processing first subframe (offset == 0). + */ + if (offset == 0 && is_amsdu_aggregation_attack(&hdr.eth, skb, iftype)) goto purge; offset += sizeof(struct ethhdr); diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index bc494745f67b..8cbb660e2ec2 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -39,6 +39,7 @@ #include <linux/blk_types.h> #include <linux/blkdev.h> #include <linux/clk.h> +#include <linux/completion.h> #include <linux/configfs.h> #include <linux/cpu.h> #include <linux/cpufreq.h> diff --git a/rust/helpers/completion.c b/rust/helpers/completion.c new file mode 100644 index 000000000000..b2443262a2ae --- /dev/null +++ b/rust/helpers/completion.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/completion.h> + +void rust_helper_init_completion(struct completion *x) +{ + init_completion(x); +} diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index 16fa9bca5949..b15b3cddad4e 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -13,6 +13,7 @@ #include "build_assert.c" #include "build_bug.c" #include "clk.c" +#include "completion.c" #include "cpu.c" #include "cpufreq.c" #include "cpumask.c" diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index 0f79a2ec9474..57502534d985 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -12,26 +12,28 @@ use crate::{ error::{Error, Result}, ffi::c_void, prelude::*, - revocable::Revocable, - sync::Arc, + revocable::{Revocable, RevocableGuard}, + sync::{rcu, Arc, Completion}, types::ARef, }; -use core::ops::Deref; - #[pin_data] struct DevresInner<T> { dev: ARef<Device>, callback: unsafe extern "C" fn(*mut c_void), #[pin] data: Revocable<T>, + #[pin] + revoke: Completion, } /// This abstraction is meant to be used by subsystems to containerize [`Device`] bound resources to /// manage their lifetime. /// /// [`Device`] bound resources should be freed when either the resource goes out of scope or the -/// [`Device`] is unbound respectively, depending on what happens first. +/// [`Device`] is unbound respectively, depending on what happens first. In any case, it is always +/// guaranteed that revoking the device resource is completed before the corresponding [`Device`] +/// is unbound. /// /// To achieve that [`Devres`] registers a devres callback on creation, which is called once the /// [`Device`] is unbound, revoking access to the encapsulated resource (see also [`Revocable`]). @@ -102,6 +104,7 @@ impl<T> DevresInner<T> { dev: dev.into(), callback: Self::devres_callback, data <- Revocable::new(data), + revoke <- Completion::new(), }), flags, )?; @@ -130,26 +133,28 @@ impl<T> DevresInner<T> { self as _ } - fn remove_action(this: &Arc<Self>) { + fn remove_action(this: &Arc<Self>) -> bool { // SAFETY: // - `self.inner.dev` is a valid `Device`, // - the `action` and `data` pointers are the exact same ones as given to devm_add_action() // previously, // - `self` is always valid, even if the action has been released already. - let ret = unsafe { + let success = unsafe { bindings::devm_remove_action_nowarn( this.dev.as_raw(), Some(this.callback), this.as_ptr() as _, ) - }; + } == 0; - if ret == 0 { + if success { // SAFETY: We leaked an `Arc` reference to devm_add_action() in `DevresInner::new`; if // devm_remove_action_nowarn() was successful we can (and have to) claim back ownership // of this reference. let _ = unsafe { Arc::from_raw(this.as_ptr()) }; } + + success } #[allow(clippy::missing_safety_doc)] @@ -161,7 +166,12 @@ impl<T> DevresInner<T> { // `DevresInner::new`. let inner = unsafe { Arc::from_raw(ptr) }; - inner.data.revoke(); + if !inner.data.revoke() { + // If `revoke()` returns false, it means that `Devres::drop` already started revoking + // `inner.data` for us. Hence we have to wait until `Devres::drop()` signals that it + // completed revoking `inner.data`. + inner.revoke.wait_for_completion(); + } } } @@ -218,20 +228,36 @@ impl<T> Devres<T> { // SAFETY: `dev` being the same device as the device this `Devres` has been created for // proves that `self.0.data` hasn't been revoked and is guaranteed to not be revoked as // long as `dev` lives; `dev` lives at least as long as `self`. - Ok(unsafe { self.deref().access() }) + Ok(unsafe { self.0.data.access() }) } -} -impl<T> Deref for Devres<T> { - type Target = Revocable<T>; + /// [`Devres`] accessor for [`Revocable::try_access`]. + pub fn try_access(&self) -> Option<RevocableGuard<'_, T>> { + self.0.data.try_access() + } + + /// [`Devres`] accessor for [`Revocable::try_access_with`]. + pub fn try_access_with<R, F: FnOnce(&T) -> R>(&self, f: F) -> Option<R> { + self.0.data.try_access_with(f) + } - fn deref(&self) -> &Self::Target { - &self.0.data + /// [`Devres`] accessor for [`Revocable::try_access_with_guard`]. + pub fn try_access_with_guard<'a>(&'a self, guard: &'a rcu::Guard) -> Option<&'a T> { + self.0.data.try_access_with_guard(guard) } } impl<T> Drop for Devres<T> { fn drop(&mut self) { - DevresInner::remove_action(&self.0); + // SAFETY: When `drop` runs, it is guaranteed that nobody is accessing the revocable data + // anymore, hence it is safe not to wait for the grace period to finish. + if unsafe { self.0.data.revoke_nosync() } { + // We revoked `self.0.data` before the devres action did, hence try to remove it. + if !DevresInner::remove_action(&self.0) { + // We could not remove the devres action, which means that it now runs concurrently, + // hence signal that `self.0.data` has been revoked successfully. + self.0.revoke.complete_all(); + } + } } } diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index 624d7a4c83ea..14c1aa402951 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -66,7 +66,7 @@ impl<T: drm::Driver> Device<T> { open: Some(drm::File::<T::File>::open_callback), postclose: Some(drm::File::<T::File>::postclose_callback), unload: None, - release: None, + release: Some(Self::release), master_set: None, master_drop: None, debugfs_init: None, @@ -162,6 +162,16 @@ impl<T: drm::Driver> Device<T> { // SAFETY: `ptr` is valid by the safety requirements of this function. unsafe { &*ptr.cast() } } + + extern "C" fn release(ptr: *mut bindings::drm_device) { + // SAFETY: `ptr` is a valid pointer to a `struct drm_device` and embedded in `Self`. + let this = unsafe { Self::from_drm_device(ptr) }; + + // SAFETY: + // - When `release` runs it is guaranteed that there is no further access to `this`. + // - `this` is valid for dropping. + unsafe { core::ptr::drop_in_place(this) }; + } } impl<T: drm::Driver> Deref for Device<T> { diff --git a/rust/kernel/drm/driver.rs b/rust/kernel/drm/driver.rs index acb638086131..af93d46d03d3 100644 --- a/rust/kernel/drm/driver.rs +++ b/rust/kernel/drm/driver.rs @@ -10,7 +10,6 @@ use crate::{ drm, error::{to_result, Result}, prelude::*, - str::CStr, types::ARef, }; use macros::vtable; diff --git a/rust/kernel/revocable.rs b/rust/kernel/revocable.rs index db4aa46bb121..06a3cdfce344 100644 --- a/rust/kernel/revocable.rs +++ b/rust/kernel/revocable.rs @@ -154,8 +154,10 @@ impl<T> Revocable<T> { /// # Safety /// /// Callers must ensure that there are no more concurrent users of the revocable object. - unsafe fn revoke_internal<const SYNC: bool>(&self) { - if self.is_available.swap(false, Ordering::Relaxed) { + unsafe fn revoke_internal<const SYNC: bool>(&self) -> bool { + let revoke = self.is_available.swap(false, Ordering::Relaxed); + + if revoke { if SYNC { // SAFETY: Just an FFI call, there are no further requirements. unsafe { bindings::synchronize_rcu() }; @@ -165,6 +167,8 @@ impl<T> Revocable<T> { // `compare_exchange` above that takes `is_available` from `true` to `false`. unsafe { drop_in_place(self.data.get()) }; } + + revoke } /// Revokes access to and drops the wrapped object. @@ -172,10 +176,13 @@ impl<T> Revocable<T> { /// Access to the object is revoked immediately to new callers of [`Revocable::try_access`], /// expecting that there are no concurrent users of the object. /// + /// Returns `true` if `&self` has been revoked with this call, `false` if it was revoked + /// already. + /// /// # Safety /// /// Callers must ensure that there are no more concurrent users of the revocable object. - pub unsafe fn revoke_nosync(&self) { + pub unsafe fn revoke_nosync(&self) -> bool { // SAFETY: By the safety requirement of this function, the caller ensures that nobody is // accessing the data anymore and hence we don't have to wait for the grace period to // finish. @@ -189,7 +196,10 @@ impl<T> Revocable<T> { /// If there are concurrent users of the object (i.e., ones that called /// [`Revocable::try_access`] beforehand and still haven't dropped the returned guard), this /// function waits for the concurrent access to complete before dropping the wrapped object. - pub fn revoke(&self) { + /// + /// Returns `true` if `&self` has been revoked with this call, `false` if it was revoked + /// already. + pub fn revoke(&self) -> bool { // SAFETY: By passing `true` we ask `revoke_internal` to wait for the grace period to // finish. unsafe { self.revoke_internal::<true>() } diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs index 36a719015583..c23a12639924 100644 --- a/rust/kernel/sync.rs +++ b/rust/kernel/sync.rs @@ -10,6 +10,7 @@ use crate::types::Opaque; use pin_init; mod arc; +pub mod completion; mod condvar; pub mod lock; mod locked_by; @@ -17,6 +18,7 @@ pub mod poll; pub mod rcu; pub use arc::{Arc, ArcBorrow, UniqueArc}; +pub use completion::Completion; pub use condvar::{new_condvar, CondVar, CondVarTimeoutResult}; pub use lock::global::{global_lock, GlobalGuard, GlobalLock, GlobalLockBackend, GlobalLockedBy}; pub use lock::mutex::{new_mutex, Mutex, MutexGuard}; diff --git a/rust/kernel/sync/completion.rs b/rust/kernel/sync/completion.rs new file mode 100644 index 000000000000..c50012a940a3 --- /dev/null +++ b/rust/kernel/sync/completion.rs @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Completion support. +//! +//! Reference: <https://docs.kernel.org/scheduler/completion.html> +//! +//! C header: [`include/linux/completion.h`](srctree/include/linux/completion.h) + +use crate::{bindings, prelude::*, types::Opaque}; + +/// Synchronization primitive to signal when a certain task has been completed. +/// +/// The [`Completion`] synchronization primitive signals when a certain task has been completed by +/// waking up other tasks that have been queued up to wait for the [`Completion`] to be completed. +/// +/// # Examples +/// +/// ``` +/// use kernel::sync::{Arc, Completion}; +/// use kernel::workqueue::{self, impl_has_work, new_work, Work, WorkItem}; +/// +/// #[pin_data] +/// struct MyTask { +/// #[pin] +/// work: Work<MyTask>, +/// #[pin] +/// done: Completion, +/// } +/// +/// impl_has_work! { +/// impl HasWork<Self> for MyTask { self.work } +/// } +/// +/// impl MyTask { +/// fn new() -> Result<Arc<Self>> { +/// let this = Arc::pin_init(pin_init!(MyTask { +/// work <- new_work!("MyTask::work"), +/// done <- Completion::new(), +/// }), GFP_KERNEL)?; +/// +/// let _ = workqueue::system().enqueue(this.clone()); +/// +/// Ok(this) +/// } +/// +/// fn wait_for_completion(&self) { +/// self.done.wait_for_completion(); +/// +/// pr_info!("Completion: task complete\n"); +/// } +/// } +/// +/// impl WorkItem for MyTask { +/// type Pointer = Arc<MyTask>; +/// +/// fn run(this: Arc<MyTask>) { +/// // process this task +/// this.done.complete_all(); +/// } +/// } +/// +/// let task = MyTask::new()?; +/// task.wait_for_completion(); +/// # Ok::<(), Error>(()) +/// ``` +#[pin_data] +pub struct Completion { + #[pin] + inner: Opaque<bindings::completion>, +} + +// SAFETY: `Completion` is safe to be send to any task. +unsafe impl Send for Completion {} + +// SAFETY: `Completion` is safe to be accessed concurrently. +unsafe impl Sync for Completion {} + +impl Completion { + /// Create an initializer for a new [`Completion`]. + pub fn new() -> impl PinInit<Self> { + pin_init!(Self { + inner <- Opaque::ffi_init(|slot: *mut bindings::completion| { + // SAFETY: `slot` is a valid pointer to an uninitialized `struct completion`. + unsafe { bindings::init_completion(slot) }; + }), + }) + } + + fn as_raw(&self) -> *mut bindings::completion { + self.inner.get() + } + + /// Signal all tasks waiting on this completion. + /// + /// This method wakes up all tasks waiting on this completion; after this operation the + /// completion is permanently done, i.e. signals all current and future waiters. + pub fn complete_all(&self) { + // SAFETY: `self.as_raw()` is a pointer to a valid `struct completion`. + unsafe { bindings::complete_all(self.as_raw()) }; + } + + /// Wait for completion of a task. + /// + /// This method waits for the completion of a task; it is not interruptible and there is no + /// timeout. + /// + /// See also [`Completion::complete_all`]. + pub fn wait_for_completion(&self) { + // SAFETY: `self.as_raw()` is a pointer to a valid `struct completion`. + unsafe { bindings::wait_for_completion(self.as_raw()) }; + } +} diff --git a/samples/damon/mtier.c b/samples/damon/mtier.c index 36d2cd933f5a..c94254b77fc9 100644 --- a/samples/damon/mtier.c +++ b/samples/damon/mtier.c @@ -164,8 +164,12 @@ static int damon_sample_mtier_enable_store( if (enable == enabled) return 0; - if (enable) - return damon_sample_mtier_start(); + if (enable) { + err = damon_sample_mtier_start(); + if (err) + enable = false; + return err; + } damon_sample_mtier_stop(); return 0; } diff --git a/samples/damon/prcl.c b/samples/damon/prcl.c index 056b1b21a0fe..5597e6a08ab2 100644 --- a/samples/damon/prcl.c +++ b/samples/damon/prcl.c @@ -122,8 +122,12 @@ static int damon_sample_prcl_enable_store( if (enable == enabled) return 0; - if (enable) - return damon_sample_prcl_start(); + if (enable) { + err = damon_sample_prcl_start(); + if (err) + enable = false; + return err; + } damon_sample_prcl_stop(); return 0; } diff --git a/samples/damon/wsse.c b/samples/damon/wsse.c index 11be25803274..e20238a249e7 100644 --- a/samples/damon/wsse.c +++ b/samples/damon/wsse.c @@ -102,8 +102,12 @@ static int damon_sample_wsse_enable_store( if (enable == enabled) return 0; - if (enable) - return damon_sample_wsse_start(); + if (enable) { + err = damon_sample_wsse_start(); + if (err) + enable = false; + return err; + } damon_sample_wsse_stop(); return 0; } diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index fd6bd69c5096..f795302ddfa8 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -20,6 +20,7 @@ #include <linux/of_fdt.h> #include <linux/page_ext.h> #include <linux/radix-tree.h> +#include <linux/maple_tree.h> #include <linux/slab.h> #include <linux/threads.h> #include <linux/vmalloc.h> @@ -93,6 +94,12 @@ LX_GDBPARSED(RADIX_TREE_MAP_SIZE) LX_GDBPARSED(RADIX_TREE_MAP_SHIFT) LX_GDBPARSED(RADIX_TREE_MAP_MASK) +/* linux/maple_tree.h */ +LX_VALUE(MAPLE_NODE_SLOTS) +LX_VALUE(MAPLE_RANGE64_SLOTS) +LX_VALUE(MAPLE_ARANGE64_SLOTS) +LX_GDBPARSED(MAPLE_NODE_MASK) + /* linux/vmalloc.h */ LX_VALUE(VM_IOREMAP) LX_VALUE(VM_ALLOC) diff --git a/scripts/gdb/linux/interrupts.py b/scripts/gdb/linux/interrupts.py index 616a5f26377a..f4f715a8f0e3 100644 --- a/scripts/gdb/linux/interrupts.py +++ b/scripts/gdb/linux/interrupts.py @@ -7,7 +7,7 @@ import gdb from linux import constants from linux import cpus from linux import utils -from linux import radixtree +from linux import mapletree irq_desc_type = utils.CachedType("struct irq_desc") @@ -23,12 +23,12 @@ def irqd_is_level(desc): def show_irq_desc(prec, irq): text = "" - desc = radixtree.lookup(gdb.parse_and_eval("&irq_desc_tree"), irq) + desc = mapletree.mtree_load(gdb.parse_and_eval("&sparse_irqs"), irq) if desc is None: return text - desc = desc.cast(irq_desc_type.get_type()) - if desc is None: + desc = desc.cast(irq_desc_type.get_type().pointer()) + if desc == 0: return text if irq_settings_is_hidden(desc): @@ -110,7 +110,7 @@ def x86_show_mce(prec, var, pfx, desc): pvar = gdb.parse_and_eval(var) text = "%*s: " % (prec, pfx) for cpu in cpus.each_online_cpu(): - text += "%10u " % (cpus.per_cpu(pvar, cpu)) + text += "%10u " % (cpus.per_cpu(pvar, cpu).dereference()) text += " %s\n" % (desc) return text @@ -142,7 +142,7 @@ def x86_show_interupts(prec): if constants.LX_CONFIG_X86_MCE: text += x86_show_mce(prec, "&mce_exception_count", "MCE", "Machine check exceptions") - text == x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls") + text += x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls") text += show_irq_err_count(prec) @@ -221,8 +221,8 @@ class LxInterruptList(gdb.Command): gdb.write("CPU%-8d" % cpu) gdb.write("\n") - if utils.gdb_eval_or_none("&irq_desc_tree") is None: - return + if utils.gdb_eval_or_none("&sparse_irqs") is None: + raise gdb.GdbError("Unable to find the sparse IRQ tree, is CONFIG_SPARSE_IRQ enabled?") for irq in range(nr_irqs): gdb.write(show_irq_desc(prec, irq)) diff --git a/scripts/gdb/linux/mapletree.py b/scripts/gdb/linux/mapletree.py new file mode 100644 index 000000000000..d52d51c0a03f --- /dev/null +++ b/scripts/gdb/linux/mapletree.py @@ -0,0 +1,252 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Maple tree helpers +# +# Copyright (c) 2025 Broadcom +# +# Authors: +# Florian Fainelli <florian.fainelli@broadcom.com> + +import gdb + +from linux import utils +from linux import constants +from linux import xarray + +maple_tree_root_type = utils.CachedType("struct maple_tree") +maple_node_type = utils.CachedType("struct maple_node") +maple_enode_type = utils.CachedType("void") + +maple_dense = 0 +maple_leaf_64 = 1 +maple_range_64 = 2 +maple_arange_64 = 3 + +class Mas(object): + ma_active = 0 + ma_start = 1 + ma_root = 2 + ma_none = 3 + ma_pause = 4 + ma_overflow = 5 + ma_underflow = 6 + ma_error = 7 + + def __init__(self, mt, first, end): + if mt.type == maple_tree_root_type.get_type().pointer(): + self.tree = mt.dereference() + elif mt.type != maple_tree_root_type.get_type(): + raise gdb.GdbError("must be {} not {}" + .format(maple_tree_root_type.get_type().pointer(), mt.type)) + self.tree = mt + self.index = first + self.last = end + self.node = None + self.status = self.ma_start + self.min = 0 + self.max = -1 + + def is_start(self): + # mas_is_start() + return self.status == self.ma_start + + def is_ptr(self): + # mas_is_ptr() + return self.status == self.ma_root + + def is_none(self): + # mas_is_none() + return self.status == self.ma_none + + def root(self): + # mas_root() + return self.tree['ma_root'].cast(maple_enode_type.get_type().pointer()) + + def start(self): + # mas_start() + if self.is_start() is False: + return None + + self.min = 0 + self.max = ~0 + + while True: + self.depth = 0 + root = self.root() + if xarray.xa_is_node(root): + self.depth = 0 + self.status = self.ma_active + self.node = mte_safe_root(root) + self.offset = 0 + if mte_dead_node(self.node) is True: + continue + + return None + + self.node = None + # Empty tree + if root is None: + self.status = self.ma_none + self.offset = constants.LX_MAPLE_NODE_SLOTS + return None + + # Single entry tree + self.status = self.ma_root + self.offset = constants.LX_MAPLE_NODE_SLOTS + + if self.index != 0: + return None + + return root + + return None + + def reset(self): + # mas_reset() + self.status = self.ma_start + self.node = None + +def mte_safe_root(node): + if node.type != maple_enode_type.get_type().pointer(): + raise gdb.GdbError("{} must be {} not {}" + .format(mte_safe_root.__name__, maple_enode_type.get_type().pointer(), node.type)) + ulong_type = utils.get_ulong_type() + indirect_ptr = node.cast(ulong_type) & ~0x2 + val = indirect_ptr.cast(maple_enode_type.get_type().pointer()) + return val + +def mte_node_type(entry): + ulong_type = utils.get_ulong_type() + val = None + if entry.type == maple_enode_type.get_type().pointer(): + val = entry.cast(ulong_type) + elif entry.type == ulong_type: + val = entry + else: + raise gdb.GdbError("{} must be {} not {}" + .format(mte_node_type.__name__, maple_enode_type.get_type().pointer(), entry.type)) + return (val >> 0x3) & 0xf + +def ma_dead_node(node): + if node.type != maple_node_type.get_type().pointer(): + raise gdb.GdbError("{} must be {} not {}" + .format(ma_dead_node.__name__, maple_node_type.get_type().pointer(), node.type)) + ulong_type = utils.get_ulong_type() + parent = node['parent'] + indirect_ptr = node['parent'].cast(ulong_type) & ~constants.LX_MAPLE_NODE_MASK + return indirect_ptr == node + +def mte_to_node(enode): + ulong_type = utils.get_ulong_type() + if enode.type == maple_enode_type.get_type().pointer(): + indirect_ptr = enode.cast(ulong_type) + elif enode.type == ulong_type: + indirect_ptr = enode + else: + raise gdb.GdbError("{} must be {} not {}" + .format(mte_to_node.__name__, maple_enode_type.get_type().pointer(), enode.type)) + indirect_ptr = indirect_ptr & ~constants.LX_MAPLE_NODE_MASK + return indirect_ptr.cast(maple_node_type.get_type().pointer()) + +def mte_dead_node(enode): + if enode.type != maple_enode_type.get_type().pointer(): + raise gdb.GdbError("{} must be {} not {}" + .format(mte_dead_node.__name__, maple_enode_type.get_type().pointer(), enode.type)) + node = mte_to_node(enode) + return ma_dead_node(node) + +def ma_is_leaf(tp): + result = tp < maple_range_64 + return tp < maple_range_64 + +def mt_pivots(t): + if t == maple_dense: + return 0 + elif t == maple_leaf_64 or t == maple_range_64: + return constants.LX_MAPLE_RANGE64_SLOTS - 1 + elif t == maple_arange_64: + return constants.LX_MAPLE_ARANGE64_SLOTS - 1 + +def ma_pivots(node, t): + if node.type != maple_node_type.get_type().pointer(): + raise gdb.GdbError("{}: must be {} not {}" + .format(ma_pivots.__name__, maple_node_type.get_type().pointer(), node.type)) + if t == maple_arange_64: + return node['ma64']['pivot'] + elif t == maple_leaf_64 or t == maple_range_64: + return node['mr64']['pivot'] + else: + return None + +def ma_slots(node, tp): + if node.type != maple_node_type.get_type().pointer(): + raise gdb.GdbError("{}: must be {} not {}" + .format(ma_slots.__name__, maple_node_type.get_type().pointer(), node.type)) + if tp == maple_arange_64: + return node['ma64']['slot'] + elif tp == maple_range_64 or tp == maple_leaf_64: + return node['mr64']['slot'] + elif tp == maple_dense: + return node['slot'] + else: + return None + +def mt_slot(mt, slots, offset): + ulong_type = utils.get_ulong_type() + return slots[offset].cast(ulong_type) + +def mtree_lookup_walk(mas): + ulong_type = utils.get_ulong_type() + n = mas.node + + while True: + node = mte_to_node(n) + tp = mte_node_type(n) + pivots = ma_pivots(node, tp) + end = mt_pivots(tp) + offset = 0 + while True: + if pivots[offset] >= mas.index: + break + if offset >= end: + break + offset += 1 + + slots = ma_slots(node, tp) + n = mt_slot(mas.tree, slots, offset) + if ma_dead_node(node) is True: + mas.reset() + return None + break + + if ma_is_leaf(tp) is True: + break + + return n + +def mtree_load(mt, index): + ulong_type = utils.get_ulong_type() + # MT_STATE(...) + mas = Mas(mt, index, index) + entry = None + + while True: + entry = mas.start() + if mas.is_none(): + return None + + if mas.is_ptr(): + if index != 0: + entry = None + return entry + + entry = mtree_lookup_walk(mas) + if entry is None and mas.is_start(): + continue + else: + break + + if xarray.xa_is_zero(entry): + return None + + return entry diff --git a/scripts/gdb/linux/vfs.py b/scripts/gdb/linux/vfs.py index c77b9ce75f6d..9e921b645a68 100644 --- a/scripts/gdb/linux/vfs.py +++ b/scripts/gdb/linux/vfs.py @@ -22,7 +22,7 @@ def dentry_name(d): if parent == d or parent == 0: return "" p = dentry_name(d['d_parent']) + "/" - return p + d['d_iname'].string() + return p + d['d_name']['name'].string() class DentryName(gdb.Function): """Return string of the full path of a dentry. diff --git a/scripts/gdb/linux/xarray.py b/scripts/gdb/linux/xarray.py new file mode 100644 index 000000000000..f4477b5def75 --- /dev/null +++ b/scripts/gdb/linux/xarray.py @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Xarray helpers +# +# Copyright (c) 2025 Broadcom +# +# Authors: +# Florian Fainelli <florian.fainelli@broadcom.com> + +import gdb + +from linux import utils +from linux import constants + +def xa_is_internal(entry): + ulong_type = utils.get_ulong_type() + return ((entry.cast(ulong_type) & 3) == 2) + +def xa_mk_internal(v): + return ((v << 2) | 2) + +def xa_is_zero(entry): + ulong_type = utils.get_ulong_type() + return entry.cast(ulong_type) == xa_mk_internal(257) + +def xa_is_node(entry): + ulong_type = utils.get_ulong_type() + return xa_is_internal(entry) and (entry.cast(ulong_type) > 4096) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 7becf3808818..d185754c2786 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1909,11 +1909,17 @@ retry: goto out_unlock; } /* Obtain the sid for the context. */ - rc = sidtab_context_to_sid(sidtab, &newcontext, out_sid); - if (rc == -ESTALE) { - rcu_read_unlock(); - context_destroy(&newcontext); - goto retry; + if (context_equal(scontext, &newcontext)) + *out_sid = ssid; + else if (context_equal(tcontext, &newcontext)) + *out_sid = tsid; + else { + rc = sidtab_context_to_sid(sidtab, &newcontext, out_sid); + if (rc == -ESTALE) { + rcu_read_unlock(); + context_destroy(&newcontext); + goto retry; + } } out_unlock: rcu_read_unlock(); diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 90ec4ef1b082..61d56b0c2be1 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -94,7 +94,7 @@ static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp, ctx->ctx_doi = XFRM_SC_DOI_LSM; ctx->ctx_alg = XFRM_SC_ALG_SELINUX; - ctx->ctx_len = str_len; + ctx->ctx_len = str_len + 1; memcpy(ctx->ctx_str, &uctx[1], str_len); ctx->ctx_str[str_len] = '\0'; rc = security_context_to_sid(ctx->ctx_str, str_len, diff --git a/sound/isa/ad1816a/ad1816a.c b/sound/isa/ad1816a/ad1816a.c index 99006dc4777e..5c9e2d41d900 100644 --- a/sound/isa/ad1816a/ad1816a.c +++ b/sound/isa/ad1816a/ad1816a.c @@ -98,7 +98,7 @@ static int snd_card_ad1816a_pnp(int dev, struct pnp_card_link *card, pdev = pnp_request_card_device(card, id->devs[1].id, NULL); if (pdev == NULL) { mpu_port[dev] = -1; - dev_warn(&pdev->dev, "MPU401 device busy, skipping.\n"); + pr_warn("MPU401 device busy, skipping.\n"); return 0; } diff --git a/sound/isa/sb/sb16_main.c b/sound/isa/sb/sb16_main.c index 74db11525003..5a083eecaa6b 100644 --- a/sound/isa/sb/sb16_main.c +++ b/sound/isa/sb/sb16_main.c @@ -703,6 +703,9 @@ static int snd_sb16_dma_control_put(struct snd_kcontrol *kcontrol, struct snd_ct unsigned char nval, oval; int change; + if (chip->mode & (SB_MODE_PLAYBACK | SB_MODE_CAPTURE)) + return -EBUSY; + nval = ucontrol->value.enumerated.item[0]; if (nval > 2) return -EINVAL; @@ -711,6 +714,10 @@ static int snd_sb16_dma_control_put(struct snd_kcontrol *kcontrol, struct snd_ct change = nval != oval; snd_sb16_set_dma_mode(chip, nval); spin_unlock_irqrestore(&chip->reg_lock, flags); + if (change) { + snd_dma_disable(chip->dma8); + snd_dma_disable(chip->dma16); + } return change; } diff --git a/sound/pci/ctxfi/xfi.c b/sound/pci/ctxfi/xfi.c index 713d36ea40cb..d8dd84d41c87 100644 --- a/sound/pci/ctxfi/xfi.c +++ b/sound/pci/ctxfi/xfi.c @@ -98,8 +98,8 @@ ct_card_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) if (err < 0) goto error; - strcpy(card->driver, "SB-XFi"); - strcpy(card->shortname, "Creative X-Fi"); + strscpy(card->driver, "SB-XFi"); + strscpy(card->shortname, "Creative X-Fi"); snprintf(card->longname, sizeof(card->longname), "%s %s %s", card->shortname, atc->chip_name, atc->model_name); diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index e5210ed48ddf..439cf1bda6e6 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2283,6 +2283,8 @@ static const struct snd_pci_quirk power_save_denylist[] = { SND_PCI_QUIRK(0x1734, 0x1232, "KONTRON SinglePC", 0), /* Dell ALC3271 */ SND_PCI_QUIRK(0x1028, 0x0962, "Dell ALC3271", 0), + /* https://bugzilla.kernel.org/show_bug.cgi?id=220210 */ + SND_PCI_QUIRK(0x17aa, 0x5079, "Lenovo Thinkpad E15", 0), {} }; diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 08308231b4ed..9a7793eb16e9 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -4551,7 +4551,9 @@ HDA_CODEC_ENTRY(0x10de002e, "Tegra186 HDMI/DP1", patch_tegra_hdmi), HDA_CODEC_ENTRY(0x10de002f, "Tegra194 HDMI/DP2", patch_tegra_hdmi), HDA_CODEC_ENTRY(0x10de0030, "Tegra194 HDMI/DP3", patch_tegra_hdmi), HDA_CODEC_ENTRY(0x10de0031, "Tegra234 HDMI/DP", patch_tegra234_hdmi), +HDA_CODEC_ENTRY(0x10de0033, "SoC 33 HDMI/DP", patch_tegra234_hdmi), HDA_CODEC_ENTRY(0x10de0034, "Tegra264 HDMI/DP", patch_tegra234_hdmi), +HDA_CODEC_ENTRY(0x10de0035, "SoC 35 HDMI/DP", patch_tegra234_hdmi), HDA_CODEC_ENTRY(0x10de0040, "GPU 40 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0041, "GPU 41 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0042, "GPU 42 HDMI/DP", patch_nvhdmi), @@ -4590,15 +4592,32 @@ HDA_CODEC_ENTRY(0x10de0097, "GPU 97 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0098, "GPU 98 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de0099, "GPU 99 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de009a, "GPU 9a HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de009b, "GPU 9b HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de009c, "GPU 9c HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de009d, "GPU 9d HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de009e, "GPU 9e HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de009f, "GPU 9f HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a0, "GPU a0 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00a1, "GPU a1 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a3, "GPU a3 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a4, "GPU a4 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a5, "GPU a5 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a6, "GPU a6 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de00a7, "GPU a7 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00a8, "GPU a8 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00a9, "GPU a9 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00aa, "GPU aa HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00ab, "GPU ab HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00ad, "GPU ad HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00ae, "GPU ae HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00af, "GPU af HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00b0, "GPU b0 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00b1, "GPU b1 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c0, "GPU c0 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c1, "GPU c1 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c3, "GPU c3 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c4, "GPU c4 HDMI/DP", patch_nvhdmi), +HDA_CODEC_ENTRY(0x10de00c5, "GPU c5 HDMI/DP", patch_nvhdmi), HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI", patch_nvhdmi_2ch), HDA_CODEC_ENTRY(0x10de8067, "MCP67/68 HDMI", patch_nvhdmi_2ch), HDA_CODEC_ENTRY(0x67663d82, "Arise 82 HDMI/DP", patch_gf_hdmi), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index cd0d7ba7320e..060db37eab83 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2656,6 +2656,7 @@ static const struct hda_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), SND_PCI_QUIRK(0x1558, 0x3702, "Clevo X370SN[VW]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x50d3, "Clevo PC50[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x5802, "Clevo X58[05]WN[RST]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), @@ -6609,6 +6610,7 @@ static void alc294_fixup_bass_speaker_15(struct hda_codec *codec, if (action == HDA_FIXUP_ACT_PRE_PROBE) { static const hda_nid_t conn[] = { 0x02, 0x03 }; snd_hda_override_conn_list(codec, 0x15, ARRAY_SIZE(conn), conn); + snd_hda_gen_add_micmute_led_cdev(codec, NULL); } } @@ -8030,6 +8032,9 @@ enum { ALC294_FIXUP_ASUS_CS35L41_SPI_2, ALC274_FIXUP_HP_AIO_BIND_DACS, ALC287_FIXUP_PREDATOR_SPK_CS35L41_I2C_2, + ALC285_FIXUP_ASUS_GA605K_HEADSET_MIC, + ALC285_FIXUP_ASUS_GA605K_I2C_SPEAKER2_TO_DAC1, + ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -10414,6 +10419,26 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc274_fixup_hp_aio_bind_dacs, }, + [ALC285_FIXUP_ASUS_GA605K_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x03a11050 }, + { 0x1b, 0x03a11c30 }, + { } + }, + .chained = true, + .chain_id = ALC285_FIXUP_ASUS_GA605K_I2C_SPEAKER2_TO_DAC1 + }, + [ALC285_FIXUP_ASUS_GA605K_I2C_SPEAKER2_TO_DAC1] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_speaker2_to_dac1, + }, + [ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_limit_int_mic_boost, + .chained = true, + .chain_id = ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE, + }, }; static const struct hda_quirk alc269_fixup_tbl[] = { @@ -10509,6 +10534,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0871, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0872, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0873, "Dell Precision 3930", ALC255_FIXUP_DUMMY_LINEOUT_VERB), + SND_PCI_QUIRK(0x1028, 0x0879, "Dell Latitude 5420 Rugged", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x08ad, "Dell WYSE AIO", ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x08ae, "Dell WYSE NB", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0935, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB), @@ -10713,6 +10739,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x897d, "HP mt440 Mobile Thin Client U74", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4), + SND_PCI_QUIRK(0x103c, 0x898a, "HP Pavilion 15-eg100", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8991, "HP EliteBook 845 G9", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), @@ -10787,6 +10814,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8b97, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8bb3, "HP Slim OMEN", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8bb4, "HP Slim OMEN", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8bc8, "HP Victus 15-fa1xxx", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8bcd, "HP Omen 16-xd0xxx", ALC245_FIXUP_HP_MUTE_LED_V1_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8bdd, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8bde, "HP Envy 17", ALC287_FIXUP_CS35L41_I2C_2), @@ -10840,6 +10868,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c91, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8c9c, "HP Victus 16-s1xxx (MB 8C9C)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), @@ -10852,6 +10881,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d01, "HP ZBook Power 14 G12", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8d07, "HP Victus 15-fb2xxx (MB 8D07)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8d18, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS), SND_PCI_QUIRK(0x103c, 0x8d84, "HP EliteBook X G1i", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d85, "HP EliteBook 14 G12", ALC285_FIXUP_HP_GPIO_LED), @@ -10881,7 +10911,9 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8def, "HP EliteBook 660 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8df0, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8df1, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8dfb, "HP EliteBook 6 G1a 14", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8dfc, "HP EliteBook 645 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8dfd, "HP EliteBook 6 G1a 16", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8dfe, "HP EliteBook 665 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8e11, "HP Trekker", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e12, "HP Trekker", ALC287_FIXUP_CS35L41_I2C_2), @@ -10904,6 +10936,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x1032, "ASUS VivoBook X513EA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x1034, "ASUS GU605C", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x1054, "ASUS G614FH/FM/FP", ALC287_FIXUP_CS35L41_I2C_2), @@ -10932,6 +10966,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x1314, "ASUS GA605K", ALC285_FIXUP_ASUS_GA605K_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK), SND_PCI_QUIRK(0x1043, 0x1433, "ASUS GX650PY/PZ/PV/PU/PYV/PZV/PIV/PVV", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC), @@ -10997,6 +11032,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1df3, "ASUS UM5606WA", ALC294_FIXUP_BASS_SPEAKER_15), SND_PCI_QUIRK(0x1043, 0x1264, "ASUS UM5606KA", ALC294_FIXUP_BASS_SPEAKER_15), SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x1e10, "ASUS VivoBook X507UAR", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1e1f, "ASUS Vivobook 15 X1504VAP", ALC2XX_FIXUP_HEADSET_MIC), @@ -11005,6 +11041,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), + SND_PCI_QUIRK(0x1043, 0x1e93, "ASUS ExpertBook B9403CVAR", ALC294_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1eb3, "ASUS Ally RCLA72", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x1ed3, "ASUS HN7306W", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2), @@ -11106,6 +11143,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x14a1, "Clevo L141MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x2624, "Clevo L240TU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x28c1, "Clevo V370VND", ALC2XX_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1558, 0x35a1, "Clevo V3[56]0EN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x35b1, "Clevo V3[57]0WN[MNP]Q", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x4018, "Clevo NV40M[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x4019, "Clevo NV40MZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x4020, "Clevo NV40MB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -11133,6 +11172,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x51b1, "Clevo NS50AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x51b3, "Clevo NS70AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x5630, "Clevo NP50RNJS", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x5700, "Clevo X560WN[RST]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70f2, "Clevo NH79EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -11172,6 +11212,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0xa650, "Clevo NP[567]0SN[CD]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa671, "Clevo NP70SN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa741, "Clevo V54x_6x_TNE", ALC245_FIXUP_CLEVO_NOISY_MIC), + SND_PCI_QUIRK(0x1558, 0xa743, "Clevo V54x_6x_TU", ALC245_FIXUP_CLEVO_NOISY_MIC), SND_PCI_QUIRK(0x1558, 0xa763, "Clevo V54x_6x_TU", ALC245_FIXUP_CLEVO_NOISY_MIC), SND_PCI_QUIRK(0x1558, 0xb018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xb019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -11384,6 +11425,8 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x2782, 0x0214, "VAIO VJFE-CL", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x2782, 0x0228, "Infinix ZERO BOOK 13", ALC269VB_FIXUP_INFINIX_ZERO_BOOK_13), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), + SND_PCI_QUIRK(0x2782, 0x1407, "Positivo P15X", ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC), + SND_PCI_QUIRK(0x2782, 0x1409, "Positivo K116J", ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC), SND_PCI_QUIRK(0x2782, 0x1701, "Infinix Y4 Max", ALC269VC_FIXUP_INFINIX_Y4_MAX), SND_PCI_QUIRK(0x2782, 0x1705, "MEDION E15433", ALC269VC_FIXUP_INFINIX_Y4_MAX), SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME), diff --git a/sound/pci/hda/tas2781_hda.c b/sound/pci/hda/tas2781_hda.c index 5f1d4b3e9688..34217ce9f28e 100644 --- a/sound/pci/hda/tas2781_hda.c +++ b/sound/pci/hda/tas2781_hda.c @@ -44,7 +44,7 @@ static void tas2781_apply_calib(struct tasdevice_priv *p) TASDEVICE_REG(0, 0x13, 0x70), TASDEVICE_REG(0, 0x18, 0x7c), }; - unsigned int crc, oft; + unsigned int crc, oft, node_num; unsigned char *buf; int i, j, k, l; @@ -80,8 +80,9 @@ static void tas2781_apply_calib(struct tasdevice_priv *p) dev_err(p->dev, "%s: CRC error\n", __func__); return; } + node_num = tmp_val[1]; - for (j = 0, k = 0; j < tmp_val[1]; j++) { + for (j = 0, k = 0; j < node_num; j++) { oft = j * 6 + 3; if (tmp_val[oft] == TASDEV_UEFI_CALI_REG_ADDR_FLG) { for (i = 0; i < TASDEV_CALIB_N; i++) { @@ -99,8 +100,9 @@ static void tas2781_apply_calib(struct tasdevice_priv *p) } data[l] = k; + oft++; for (i = 0; i < TASDEV_CALIB_N * 4; i++) - data[l + i] = data[4 * oft + i]; + data[l + i + 1] = data[4 * oft + i]; k++; } } diff --git a/sound/soc/amd/ps/acp63.h b/sound/soc/amd/ps/acp63.h index 85feae45c44c..d7c994e26e4d 100644 --- a/sound/soc/amd/ps/acp63.h +++ b/sound/soc/amd/ps/acp63.h @@ -334,6 +334,8 @@ struct acp_hw_ops { * @addr: pci ioremap address * @reg_range: ACP reigister range * @acp_rev: ACP PCI revision id + * @acp_sw_pad_keeper_en: store acp SoundWire pad keeper enable register value + * @acp_pad_pulldown_ctrl: store acp pad pulldown control register value * @acp63_sdw0-dma_intr_stat: DMA interrupt status array for ACP6.3 platform SoundWire * manager-SW0 instance * @acp63_sdw_dma_intr_stat: DMA interrupt status array for ACP6.3 platform SoundWire @@ -367,6 +369,8 @@ struct acp63_dev_data { u32 addr; u32 reg_range; u32 acp_rev; + u32 acp_sw_pad_keeper_en; + u32 acp_pad_pulldown_ctrl; u16 acp63_sdw0_dma_intr_stat[ACP63_SDW0_DMA_MAX_STREAMS]; u16 acp63_sdw1_dma_intr_stat[ACP63_SDW1_DMA_MAX_STREAMS]; u16 acp70_sdw0_dma_intr_stat[ACP70_SDW0_DMA_MAX_STREAMS]; diff --git a/sound/soc/amd/ps/ps-common.c b/sound/soc/amd/ps/ps-common.c index 1c89fb5fe1da..7b4966b75dc6 100644 --- a/sound/soc/amd/ps/ps-common.c +++ b/sound/soc/amd/ps/ps-common.c @@ -160,6 +160,8 @@ static int __maybe_unused snd_acp63_suspend(struct device *dev) adata = dev_get_drvdata(dev); if (adata->is_sdw_dev) { + adata->acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + adata->acp_pad_pulldown_ctrl = readl(adata->acp63_base + ACP_PAD_PULLDOWN_CTRL); adata->sdw_en_stat = check_acp_sdw_enable_status(adata); if (adata->sdw_en_stat) { writel(1, adata->acp63_base + ACP_ZSC_DSP_CTRL); @@ -197,6 +199,7 @@ static int __maybe_unused snd_acp63_runtime_resume(struct device *dev) static int __maybe_unused snd_acp63_resume(struct device *dev) { struct acp63_dev_data *adata; + u32 acp_sw_pad_keeper_en; int ret; adata = dev_get_drvdata(dev); @@ -209,6 +212,12 @@ static int __maybe_unused snd_acp63_resume(struct device *dev) if (ret) dev_err(dev, "ACP init failed\n"); + acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + dev_dbg(dev, "ACP_SW0_PAD_KEEPER_EN:0x%x\n", acp_sw_pad_keeper_en); + if (!acp_sw_pad_keeper_en) { + writel(adata->acp_sw_pad_keeper_en, adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + writel(adata->acp_pad_pulldown_ctrl, adata->acp63_base + ACP_PAD_PULLDOWN_CTRL); + } return ret; } @@ -408,6 +417,8 @@ static int __maybe_unused snd_acp70_suspend(struct device *dev) adata = dev_get_drvdata(dev); if (adata->is_sdw_dev) { + adata->acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + adata->acp_pad_pulldown_ctrl = readl(adata->acp63_base + ACP_PAD_PULLDOWN_CTRL); adata->sdw_en_stat = check_acp_sdw_enable_status(adata); if (adata->sdw_en_stat) { writel(1, adata->acp63_base + ACP_ZSC_DSP_CTRL); @@ -445,6 +456,7 @@ static int __maybe_unused snd_acp70_runtime_resume(struct device *dev) static int __maybe_unused snd_acp70_resume(struct device *dev) { struct acp63_dev_data *adata; + u32 acp_sw_pad_keeper_en; int ret; adata = dev_get_drvdata(dev); @@ -459,6 +471,12 @@ static int __maybe_unused snd_acp70_resume(struct device *dev) if (ret) dev_err(dev, "ACP init failed\n"); + acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + dev_dbg(dev, "ACP_SW0_PAD_KEEPER_EN:0x%x\n", acp_sw_pad_keeper_en); + if (!acp_sw_pad_keeper_en) { + writel(adata->acp_sw_pad_keeper_en, adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + writel(adata->acp_pad_pulldown_ctrl, adata->acp63_base + ACP_PAD_PULLDOWN_CTRL); + } return ret; } diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 7e62445e02c1..97e340140d0c 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -356,6 +356,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { { .driver_data = &acp6x_card, .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "RB"), + DMI_MATCH(DMI_PRODUCT_NAME, "Nitro ANV15-41"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "83J2"), } @@ -363,6 +370,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { { .driver_data = &acp6x_card, .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83J3"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "UM5302TA"), } @@ -454,6 +468,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { { .driver_data = &acp6x_card, .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 17 D7VF"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Alienware"), DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m17 R5 AMD"), } @@ -518,6 +539,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { .driver_data = &acp6x_card, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "Victus by HP Gaming Laptop 15-fb2xxx"), + } + }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), DMI_MATCH(DMI_BOARD_NAME, "8A42"), } }, diff --git a/sound/soc/apple/Kconfig b/sound/soc/apple/Kconfig index 793f7782e0d7..e9c777cdb6e3 100644 --- a/sound/soc/apple/Kconfig +++ b/sound/soc/apple/Kconfig @@ -2,7 +2,6 @@ config SND_SOC_APPLE_MCA tristate "Apple Silicon MCA driver" depends on ARCH_APPLE || COMPILE_TEST select SND_DMAENGINE_PCM - default ARCH_APPLE help This option enables an ASoC platform driver for MCA peripherals found on Apple Silicon SoCs. diff --git a/sound/soc/codecs/cs35l56-sdw.c b/sound/soc/codecs/cs35l56-sdw.c index 13f602f51bf3..fa9693af3722 100644 --- a/sound/soc/codecs/cs35l56-sdw.c +++ b/sound/soc/codecs/cs35l56-sdw.c @@ -238,16 +238,15 @@ static const struct regmap_bus cs35l56_regmap_bus_sdw = { .val_format_endian_default = REGMAP_ENDIAN_BIG, }; -static int cs35l56_sdw_set_cal_index(struct cs35l56_private *cs35l56) +static int cs35l56_sdw_get_unique_id(struct cs35l56_private *cs35l56) { int ret; - /* SoundWire UniqueId is used to index the calibration array */ ret = sdw_read_no_pm(cs35l56->sdw_peripheral, SDW_SCP_DEVID_0); if (ret < 0) return ret; - cs35l56->base.cal_index = ret & 0xf; + cs35l56->sdw_unique_id = ret & 0xf; return 0; } @@ -259,11 +258,13 @@ static void cs35l56_sdw_init(struct sdw_slave *peripheral) pm_runtime_get_noresume(cs35l56->base.dev); - if (cs35l56->base.cal_index < 0) { - ret = cs35l56_sdw_set_cal_index(cs35l56); - if (ret < 0) - goto out; - } + ret = cs35l56_sdw_get_unique_id(cs35l56); + if (ret) + goto out; + + /* SoundWire UniqueId is used to index the calibration array */ + if (cs35l56->base.cal_index < 0) + cs35l56->base.cal_index = cs35l56->sdw_unique_id; ret = cs35l56_init(cs35l56); if (ret < 0) { @@ -587,6 +588,7 @@ static int cs35l56_sdw_probe(struct sdw_slave *peripheral, const struct sdw_devi cs35l56->base.dev = dev; cs35l56->sdw_peripheral = peripheral; + cs35l56->sdw_link_num = peripheral->bus->link_id; INIT_WORK(&cs35l56->sdw_irq_work, cs35l56_sdw_irq_work); dev_set_drvdata(dev, cs35l56); diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index d0831d609584..ba653f6ccfae 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -980,7 +980,7 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) break; default: dev_err(cs35l56_base->dev, "Unknown device %x\n", devid); - return ret; + return -ENODEV; } cs35l56_base->type = devid & 0xFF; diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index c78e4746e428..1b42586794ad 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -706,17 +706,41 @@ static int cs35l56_write_cal(struct cs35l56_private *cs35l56) return ret; } -static void cs35l56_reinit_patch(struct cs35l56_private *cs35l56) +static int cs35l56_dsp_download_and_power_up(struct cs35l56_private *cs35l56, + bool load_firmware) { int ret; - /* Use wm_adsp to load and apply the firmware patch and coefficient files */ - ret = wm_adsp_power_up(&cs35l56->dsp, true); + /* + * Abort the first load if it didn't find the suffixed bins and + * we have an alternate fallback suffix. + */ + cs35l56->dsp.bin_mandatory = (load_firmware && cs35l56->fallback_fw_suffix); + + ret = wm_adsp_power_up(&cs35l56->dsp, load_firmware); + if ((ret == -ENOENT) && cs35l56->dsp.bin_mandatory) { + cs35l56->dsp.fwf_suffix = cs35l56->fallback_fw_suffix; + cs35l56->fallback_fw_suffix = NULL; + cs35l56->dsp.bin_mandatory = false; + ret = wm_adsp_power_up(&cs35l56->dsp, load_firmware); + } + if (ret) { - dev_dbg(cs35l56->base.dev, "%s: wm_adsp_power_up ret %d\n", __func__, ret); - return; + dev_dbg(cs35l56->base.dev, "wm_adsp_power_up ret %d\n", ret); + return ret; } + return 0; +} + +static void cs35l56_reinit_patch(struct cs35l56_private *cs35l56) +{ + int ret; + + ret = cs35l56_dsp_download_and_power_up(cs35l56, true); + if (ret) + return; + cs35l56_write_cal(cs35l56); /* Always REINIT after applying patch or coefficients */ @@ -750,11 +774,9 @@ static void cs35l56_patch(struct cs35l56_private *cs35l56, bool firmware_missing * but only if firmware is missing. If firmware is already patched just * power-up wm_adsp without downloading firmware. */ - ret = wm_adsp_power_up(&cs35l56->dsp, !!firmware_missing); - if (ret) { - dev_dbg(cs35l56->base.dev, "%s: wm_adsp_power_up ret %d\n", __func__, ret); + ret = cs35l56_dsp_download_and_power_up(cs35l56, firmware_missing); + if (ret) goto err; - } mutex_lock(&cs35l56->base.irq_lock); @@ -853,6 +875,34 @@ err: pm_runtime_put_autosuspend(cs35l56->base.dev); } +static int cs35l56_set_fw_suffix(struct cs35l56_private *cs35l56) +{ + if (cs35l56->dsp.fwf_suffix) + return 0; + + if (!cs35l56->sdw_peripheral) + return 0; + + cs35l56->dsp.fwf_suffix = devm_kasprintf(cs35l56->base.dev, GFP_KERNEL, + "l%uu%u", + cs35l56->sdw_link_num, + cs35l56->sdw_unique_id); + if (!cs35l56->dsp.fwf_suffix) + return -ENOMEM; + + /* + * There are published firmware files for L56 B0 silicon using + * the ALSA prefix as the filename suffix. Default to trying these + * first, with the new name as an alternate. + */ + if ((cs35l56->base.type == 0x56) && (cs35l56->base.rev == 0xb0)) { + cs35l56->fallback_fw_suffix = cs35l56->dsp.fwf_suffix; + cs35l56->dsp.fwf_suffix = cs35l56->component->name_prefix; + } + + return 0; +} + static int cs35l56_component_probe(struct snd_soc_component *component) { struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); @@ -892,6 +942,10 @@ static int cs35l56_component_probe(struct snd_soc_component *component) return -ENOMEM; cs35l56->component = component; + ret = cs35l56_set_fw_suffix(cs35l56); + if (ret) + return ret; + wm_adsp2_component_probe(&cs35l56->dsp, component); debugfs_create_bool("init_done", 0444, debugfs_root, &cs35l56->base.init_done); diff --git a/sound/soc/codecs/cs35l56.h b/sound/soc/codecs/cs35l56.h index 200f695efca3..bd77a57249d7 100644 --- a/sound/soc/codecs/cs35l56.h +++ b/sound/soc/codecs/cs35l56.h @@ -38,6 +38,7 @@ struct cs35l56_private { struct snd_soc_component *component; struct regulator_bulk_data supplies[CS35L56_NUM_BULK_SUPPLIES]; struct sdw_slave *sdw_peripheral; + const char *fallback_fw_suffix; struct work_struct sdw_irq_work; bool sdw_irq_no_unmask; bool soft_resetting; @@ -52,6 +53,8 @@ struct cs35l56_private { bool tdm_mode; bool sysclk_set; u8 old_sdw_clock_scale; + u8 sdw_link_num; + u8 sdw_unique_id; }; extern const struct dev_pm_ops cs35l56_pm_ops_i2c_spi; diff --git a/sound/soc/codecs/cs48l32.c b/sound/soc/codecs/cs48l32.c index 90a795230d27..9bdd48aab42a 100644 --- a/sound/soc/codecs/cs48l32.c +++ b/sound/soc/codecs/cs48l32.c @@ -2162,6 +2162,10 @@ static int cs48l32_hw_params(struct snd_pcm_substream *substream, n_slots_multiple = 1; sclk_target = snd_soc_tdm_params_to_bclk(params, slotw, n_slots, n_slots_multiple); + if (sclk_target < 0) { + cs48l32_asp_err(dai, "Invalid parameters\n"); + return sclk_target; + } for (i = 0; i < ARRAY_SIZE(cs48l32_sclk_rates); i++) { if ((cs48l32_sclk_rates[i].freq >= sclk_target) && diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index 066d92b54312..78c4e68f6002 100644 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -1079,8 +1079,7 @@ static void es8326_init(struct snd_soc_component *component) regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00); regmap_write(es8326->regmap, ES8326_INTOUT_IO, es8326->interrupt_clk); - regmap_write(es8326->regmap, ES8326_SDINOUT1_IO, - (ES8326_IO_DMIC_CLK << ES8326_SDINOUT1_SHIFT)); + regmap_write(es8326->regmap, ES8326_SDINOUT1_IO, ES8326_IO_INPUT); regmap_write(es8326->regmap, ES8326_SDINOUT23_IO, ES8326_IO_INPUT); regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x00); diff --git a/sound/soc/codecs/rt721-sdca.c b/sound/soc/codecs/rt721-sdca.c index 1c9f32e405cf..ba080957e933 100644 --- a/sound/soc/codecs/rt721-sdca.c +++ b/sound/soc/codecs/rt721-sdca.c @@ -430,6 +430,7 @@ static int rt721_sdca_set_gain_get(struct snd_kcontrol *kcontrol, unsigned int read_l, read_r, ctl_l = 0, ctl_r = 0; unsigned int adc_vol_flag = 0; const unsigned int interval_offset = 0xc0; + const unsigned int tendA = 0x200; const unsigned int tendB = 0xa00; if (strstr(ucontrol->id.name, "FU1E Capture Volume") || @@ -439,9 +440,16 @@ static int rt721_sdca_set_gain_get(struct snd_kcontrol *kcontrol, regmap_read(rt721->mbq_regmap, mc->reg, &read_l); regmap_read(rt721->mbq_regmap, mc->rreg, &read_r); - if (mc->shift == 8) /* boost gain */ + if (mc->shift == 8) { + /* boost gain */ ctl_l = read_l / tendB; - else { + } else if (mc->shift == 1) { + /* FU33 boost gain */ + if (read_l == 0x8000 || read_l == 0xfe00) + ctl_l = 0; + else + ctl_l = read_l / tendA + 1; + } else { if (adc_vol_flag) ctl_l = mc->max - (((0x1e00 - read_l) & 0xffff) / interval_offset); else @@ -449,9 +457,16 @@ static int rt721_sdca_set_gain_get(struct snd_kcontrol *kcontrol, } if (read_l != read_r) { - if (mc->shift == 8) /* boost gain */ + if (mc->shift == 8) { + /* boost gain */ ctl_r = read_r / tendB; - else { /* ADC/DAC gain */ + } else if (mc->shift == 1) { + /* FU33 boost gain */ + if (read_r == 0x8000 || read_r == 0xfe00) + ctl_r = 0; + else + ctl_r = read_r / tendA + 1; + } else { /* ADC/DAC gain */ if (adc_vol_flag) ctl_r = mc->max - (((0x1e00 - read_r) & 0xffff) / interval_offset); else diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 3c580faab3b7..8a1d5cc75d6c 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -783,16 +783,19 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, char **coeff_filename) { const char *system_name = dsp->system_name; - const char *asoc_component_prefix = dsp->component->name_prefix; + const char *suffix = dsp->component->name_prefix; int ret = 0; - if (system_name && asoc_component_prefix) { + if (dsp->fwf_suffix) + suffix = dsp->fwf_suffix; + + if (system_name && suffix) { if (!wm_adsp_request_firmware_file(dsp, wmfw_firmware, wmfw_filename, cirrus_dir, system_name, - asoc_component_prefix, "wmfw")) { + suffix, "wmfw")) { wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, cirrus_dir, system_name, - asoc_component_prefix, "bin"); + suffix, "bin"); return 0; } } @@ -801,10 +804,10 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, if (!wm_adsp_request_firmware_file(dsp, wmfw_firmware, wmfw_filename, cirrus_dir, system_name, NULL, "wmfw")) { - if (asoc_component_prefix) + if (suffix) wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, cirrus_dir, system_name, - asoc_component_prefix, "bin"); + suffix, "bin"); if (!*coeff_firmware) wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, @@ -816,10 +819,10 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, /* Check system-specific bin without wmfw before falling back to generic */ if (dsp->wmfw_optional && system_name) { - if (asoc_component_prefix) + if (suffix) wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, cirrus_dir, system_name, - asoc_component_prefix, "bin"); + suffix, "bin"); if (!*coeff_firmware) wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, @@ -850,7 +853,7 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, adsp_err(dsp, "Failed to request firmware <%s>%s-%s-%s<-%s<%s>>.wmfw\n", cirrus_dir, dsp->part, dsp->fwf_name ? dsp->fwf_name : dsp->cs_dsp.name, - wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix); + wm_adsp_fw[dsp->fw].file, system_name, suffix); return -ENOENT; } @@ -997,11 +1000,17 @@ int wm_adsp_power_up(struct wm_adsp *dsp, bool load_firmware) return ret; } + if (dsp->bin_mandatory && !coeff_firmware) { + ret = -ENOENT; + goto err; + } + ret = cs_dsp_power_up(&dsp->cs_dsp, wmfw_firmware, wmfw_filename, coeff_firmware, coeff_filename, wm_adsp_fw_text[dsp->fw]); +err: wm_adsp_release_firmware_files(dsp, wmfw_firmware, wmfw_filename, coeff_firmware, coeff_filename); diff --git a/sound/soc/codecs/wm_adsp.h b/sound/soc/codecs/wm_adsp.h index edc5b02ae765..25210d404bf1 100644 --- a/sound/soc/codecs/wm_adsp.h +++ b/sound/soc/codecs/wm_adsp.h @@ -29,12 +29,14 @@ struct wm_adsp { const char *part; const char *fwf_name; const char *system_name; + const char *fwf_suffix; struct snd_soc_component *component; unsigned int sys_config_size; int fw; bool wmfw_optional; + bool bin_mandatory; struct work_struct boot_work; int (*control_add)(struct wm_adsp *dsp, struct cs_dsp_coeff_ctl *cs_ctl); diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c index 677529916dc0..745532ccbdba 100644 --- a/sound/soc/fsl/fsl_asrc.c +++ b/sound/soc/fsl/fsl_asrc.c @@ -517,7 +517,8 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair, bool use_ideal_rate) regmap_update_bits(asrc->regmap, REG_ASRCTR, ASRCTR_ATSi_MASK(index), ASRCTR_ATS(index)); regmap_update_bits(asrc->regmap, REG_ASRCTR, - ASRCTR_USRi_MASK(index), 0); + ASRCTR_IDRi_MASK(index) | ASRCTR_USRi_MASK(index), + ASRCTR_USR(index)); /* Set the input and output clock sources */ regmap_update_bits(asrc->regmap, REG_ASRCSR, diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index af1a168d35e3..50af6b725670 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -803,13 +803,15 @@ static void fsl_sai_config_disable(struct fsl_sai *sai, int dir) * anymore. Add software reset to fix this issue. * This is a hardware bug, and will be fix in the * next sai version. + * + * In consumer mode, this can happen even after a + * single open/close, especially if both tx and rx + * are running concurrently. */ - if (!sai->is_consumer_mode[tx]) { - /* Software Reset */ - regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR); - /* Clear SR bit to finish the reset */ - regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), 0); - } + /* Software Reset */ + regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), FSL_SAI_CSR_SR); + /* Clear SR bit to finish the reset */ + regmap_write(sai->regmap, FSL_SAI_xCSR(tx, ofs), 0); } static int fsl_sai_trigger(struct snd_pcm_substream *substream, int cmd, diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index 2df7afa2f469..128b6876af83 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -42,6 +42,7 @@ config SND_SOC_INTEL_SOF_NUVOTON_COMMON tristate config SND_SOC_INTEL_SOF_BOARD_HELPERS + select SND_SOC_ACPI_INTEL_MATCH tristate if SND_SOC_INTEL_CATPT diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 81a914bd7ec2..504887505e68 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -783,6 +783,9 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { static const struct snd_pci_quirk sof_sdw_ssid_quirk_table[] = { SND_PCI_QUIRK(0x1043, 0x1e13, "ASUS Zenbook S14", SOC_SDW_CODEC_MIC), SND_PCI_QUIRK(0x1043, 0x1f43, "ASUS Zenbook S16", SOC_SDW_CODEC_MIC), + SND_PCI_QUIRK(0x17aa, 0x2347, "Lenovo P16", SOC_SDW_CODEC_MIC), + SND_PCI_QUIRK(0x17aa, 0x2348, "Lenovo P16", SOC_SDW_CODEC_MIC), + SND_PCI_QUIRK(0x17aa, 0x2349, "Lenovo P1", SOC_SDW_CODEC_MIC), {} }; diff --git a/sound/soc/intel/common/soc-acpi-intel-arl-match.c b/sound/soc/intel/common/soc-acpi-intel-arl-match.c index 73e581e93755..1ad704ca2c5f 100644 --- a/sound/soc/intel/common/soc-acpi-intel-arl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-arl-match.c @@ -468,17 +468,17 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_arl_sdw_machines[] = { .get_function_tplg_files = sof_sdw_get_tplg_files, }, { - .link_mask = BIT(2), - .links = arl_cs42l43_l2, + .link_mask = BIT(2) | BIT(3), + .links = arl_cs42l43_l2_cs35l56_l3, .drv_name = "sof_sdw", - .sof_tplg_filename = "sof-arl-cs42l43-l2.tplg", + .sof_tplg_filename = "sof-arl-cs42l43-l2-cs35l56-l3.tplg", .get_function_tplg_files = sof_sdw_get_tplg_files, }, { - .link_mask = BIT(2) | BIT(3), - .links = arl_cs42l43_l2_cs35l56_l3, + .link_mask = BIT(2), + .links = arl_cs42l43_l2, .drv_name = "sof_sdw", - .sof_tplg_filename = "sof-arl-cs42l43-l2-cs35l56-l3.tplg", + .sof_tplg_filename = "sof-arl-cs42l43-l2.tplg", .get_function_tplg_files = sof_sdw_get_tplg_files, }, { diff --git a/sound/soc/intel/common/sof-function-topology-lib.c b/sound/soc/intel/common/sof-function-topology-lib.c index 90fe7aa3df1c..3cc81dcf047e 100644 --- a/sound/soc/intel/common/sof-function-topology-lib.c +++ b/sound/soc/intel/common/sof-function-topology-lib.c @@ -73,7 +73,8 @@ int sof_sdw_get_tplg_files(struct snd_soc_card *card, const struct snd_soc_acpi_ break; default: dev_warn(card->dev, - "only -2ch and -4ch are supported for dmic\n"); + "unsupported number of dmics: %d\n", + mach_params.dmic_num); continue; } tplg_dev = TPLG_DEVICE_INTEL_PCH_DMIC; diff --git a/sound/soc/loongson/loongson_i2s.c b/sound/soc/loongson/loongson_i2s.c index e8852a30f213..e336656e13eb 100644 --- a/sound/soc/loongson/loongson_i2s.c +++ b/sound/soc/loongson/loongson_i2s.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/platform_device.h> #include <linux/delay.h> +#include <linux/export.h> #include <linux/pm_runtime.h> #include <linux/dma-mapping.h> #include <sound/soc.h> diff --git a/sound/soc/qcom/Kconfig b/sound/soc/qcom/Kconfig index e86b4a03dd61..3d9ba13ee1e5 100644 --- a/sound/soc/qcom/Kconfig +++ b/sound/soc/qcom/Kconfig @@ -186,6 +186,7 @@ config SND_SOC_SM8250 tristate "SoC Machine driver for SM8250 boards" depends on QCOM_APR && SOUNDWIRE depends on COMMON_CLK + depends on SND_SOC_QCOM_OFFLOAD_UTILS || !SND_SOC_QCOM_OFFLOAD_UTILS select SND_SOC_QDSP6 select SND_SOC_QCOM_COMMON select SND_SOC_QCOM_SDW diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index b7060b746356..d75e7292240b 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -1205,6 +1205,8 @@ static int is_sdca_endpoint_present(struct device *dev, int i; dlc = kzalloc(sizeof(*dlc), GFP_KERNEL); + if (!dlc) + return -ENOMEM; adr_end = &adr_dev->endpoints[end_index]; dai_info = &codec_info->dais[adr_end->num]; diff --git a/sound/soc/sof/imx/imx8.c b/sound/soc/sof/imx/imx8.c index a40a8047873e..b73dd91bd529 100644 --- a/sound/soc/sof/imx/imx8.c +++ b/sound/soc/sof/imx/imx8.c @@ -40,6 +40,19 @@ struct imx8m_chip_data { struct reset_control *run_stall; }; +static int imx8_shutdown(struct snd_sof_dev *sdev) +{ + /* + * Force the DSP to stall. After the firmware image is loaded, + * the stall will be removed during run() by a matching + * imx_sc_pm_cpu_start() call. + */ + imx_sc_pm_cpu_start(get_chip_pdata(sdev), IMX_SC_R_DSP, false, + RESET_VECTOR_VADDR); + + return 0; +} + /* * DSP control. */ @@ -281,11 +294,13 @@ static int imx8_ops_init(struct snd_sof_dev *sdev) static const struct imx_chip_ops imx8_chip_ops = { .probe = imx8_probe, .core_kick = imx8_run, + .core_shutdown = imx8_shutdown, }; static const struct imx_chip_ops imx8x_chip_ops = { .probe = imx8_probe, .core_kick = imx8x_run, + .core_shutdown = imx8_shutdown, }; static const struct imx_chip_ops imx8m_chip_ops = { diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index bdfe388da198..3b47191ea7a5 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -1257,11 +1257,11 @@ static int check_tplg_quirk_mask(struct snd_soc_acpi_mach *mach) return 0; } -static char *remove_file_ext(const char *tplg_filename) +static char *remove_file_ext(struct device *dev, const char *tplg_filename) { char *filename, *tmp; - filename = kstrdup(tplg_filename, GFP_KERNEL); + filename = devm_kstrdup(dev, tplg_filename, GFP_KERNEL); if (!filename) return NULL; @@ -1345,7 +1345,7 @@ struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev) */ if (!sof_pdata->tplg_filename) { /* remove file extension if it exists */ - tplg_filename = remove_file_ext(mach->sof_tplg_filename); + tplg_filename = remove_file_ext(sdev->dev, mach->sof_tplg_filename); if (!tplg_filename) return NULL; diff --git a/sound/usb/format.c b/sound/usb/format.c index 8cd54f7bf33a..0ee532acbb60 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -310,16 +310,14 @@ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, struct audioformat *fp, unsigned int rate) { - struct usb_interface *iface; struct usb_host_interface *alts; unsigned char *fmt; unsigned int max_rate; - iface = usb_ifnum_to_if(chip->dev, fp->iface); - if (!iface) + alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting); + if (!alts) return true; - alts = &iface->altsetting[fp->altset_idx]; fmt = snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, UAC_FORMAT_TYPE); if (!fmt) @@ -328,20 +326,20 @@ static bool focusrite_valid_sample_rate(struct snd_usb_audio *chip, if (fmt[0] == 10) { /* bLength */ max_rate = combine_quad(&fmt[6]); - /* Validate max rate */ - if (max_rate != 48000 && - max_rate != 96000 && - max_rate != 192000 && - max_rate != 384000) { - + switch (max_rate) { + case 48000: + return (rate == 44100 || rate == 48000); + case 96000: + return (rate == 88200 || rate == 96000); + case 192000: + return (rate == 176400 || rate == 192000); + default: usb_audio_info(chip, "%u:%d : unexpected max rate: %u\n", fp->iface, fp->altsetting, max_rate); return true; } - - return rate <= max_rate; } return true; diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index 0e9b5431a47f..faac7df1fbcf 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -383,6 +383,13 @@ static const struct usbmix_name_map ms_usb_link_map[] = { { 0 } /* terminator */ }; +/* KTMicro USB */ +static struct usbmix_name_map s31b2_0022_map[] = { + { 23, "Speaker Playback" }, + { 18, "Headphone Playback" }, + { 0 } +}; + /* ASUS ROG Zenith II with Realtek ALC1220-VB */ static const struct usbmix_name_map asus_zenith_ii_map[] = { { 19, NULL, 12 }, /* FU, Input Gain Pad - broken response, disabled */ @@ -692,6 +699,11 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x045e, 0x083c), .map = ms_usb_link_map, }, + { + /* KTMicro USB */ + .id = USB_ID(0X31b2, 0x0022), + .map = s31b2_0022_map, + }, { 0 } /* terminator */ }; diff --git a/sound/usb/qcom/qc_audio_offload.c b/sound/usb/qcom/qc_audio_offload.c index 5bc27c82e0af..3543b5a53592 100644 --- a/sound/usb/qcom/qc_audio_offload.c +++ b/sound/usb/qcom/qc_audio_offload.c @@ -759,7 +759,7 @@ static void qmi_stop_session(void) subs = find_substream(pcm_card_num, info->pcm_dev_num, info->direction); if (!subs || !chip || atomic_read(&chip->shutdown)) { - dev_err(&subs->dev->dev, + dev_err(&uadev[idx].udev->dev, "no sub for c#%u dev#%u dir%u\n", info->pcm_card_num, info->pcm_dev_num, @@ -1360,20 +1360,21 @@ static int prepare_qmi_response(struct snd_usb_substream *subs, if (!uadev[card_num].ctrl_intf) { dev_err(&subs->dev->dev, "audio ctrl intf info not cached\n"); - ret = -ENODEV; - goto err; + return -ENODEV; } ret = uaudio_populate_uac_desc(subs, resp); if (ret < 0) - goto err; + return ret; resp->slot_id = subs->dev->slot_id; resp->slot_id_valid = 1; data = snd_soc_usb_find_priv_data(uaudio_qdev->auxdev->dev.parent); - if (!data) - goto err; + if (!data) { + dev_err(&subs->dev->dev, "No private data found\n"); + return -ENODEV; + } uaudio_qdev->data = data; @@ -1382,7 +1383,7 @@ static int prepare_qmi_response(struct snd_usb_substream *subs, &resp->xhci_mem_info.tr_data, &resp->std_as_data_ep_desc); if (ret < 0) - goto err; + return ret; resp->std_as_data_ep_desc_valid = 1; @@ -1500,7 +1501,6 @@ drop_data_ep: xhci_sideband_remove_endpoint(uadev[card_num].sb, usb_pipe_endpoint(subs->dev, subs->data_endpoint->pipe)); -err: return ret; } diff --git a/sound/usb/stream.c b/sound/usb/stream.c index c1ea8844a46f..aa91d63749f2 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -987,6 +987,8 @@ snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip, * and request Cluster Descriptor */ wLength = le16_to_cpu(hc_header.wLength); + if (wLength < sizeof(cluster)) + return NULL; cluster = kzalloc(wLength, GFP_KERNEL); if (!cluster) return ERR_PTR(-ENOMEM); diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index af9d9acaf997..ed5f3892674c 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -431,10 +431,11 @@ enum { /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 -#define KVM_ARM_VCPU_PMU_V3_IRQ 0 -#define KVM_ARM_VCPU_PMU_V3_INIT 1 -#define KVM_ARM_VCPU_PMU_V3_FILTER 2 -#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_IRQ 0 +#define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_PMU_V3_FILTER 2 +#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS 4 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 diff --git a/tools/arch/loongarch/include/asm/orc_types.h b/tools/arch/loongarch/include/asm/orc_types.h index caf1f71a1057..d5fa98d1d177 100644 --- a/tools/arch/loongarch/include/asm/orc_types.h +++ b/tools/arch/loongarch/include/asm/orc_types.h @@ -34,7 +34,7 @@ #define ORC_TYPE_REGS 3 #define ORC_TYPE_REGS_PARTIAL 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This struct is more or less a vastly simplified version of the DWARF Call * Frame Information standard. It contains only the necessary parts of DWARF @@ -53,6 +53,6 @@ struct orc_entry { unsigned int type:3; unsigned int signal:1; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ORC_TYPES_H */ diff --git a/tools/arch/x86/include/asm/amd/ibs.h b/tools/arch/x86/include/asm/amd/ibs.h index 300b6e0765b2..cbce54fec7b9 100644 --- a/tools/arch/x86/include/asm/amd/ibs.h +++ b/tools/arch/x86/include/asm/amd/ibs.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AMD_IBS_H +#define _ASM_X86_AMD_IBS_H + /* * From PPR Vol 1 for AMD Family 19h Model 01h B1 * 55898 Rev 0.35 - Feb 5, 2021 @@ -151,3 +154,5 @@ struct perf_ibs_data { }; u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; }; + +#endif /* _ASM_X86_AMD_IBS_H */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index e02be2962a01..ee176236c2be 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -336,7 +336,7 @@ #define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ #define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */ -#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/ +#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/ #define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */ #define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */ @@ -379,6 +379,7 @@ #define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ #define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ +#define X86_FEATURE_BUS_LOCK_THRESHOLD (15*32+29) /* Bus lock threshold */ #define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ @@ -447,6 +448,7 @@ #define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */ #define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */ #define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */ +#define X86_FEATURE_ALLOWED_SEV_FEATURES (19*32+27) /* Allowed SEV Features */ #define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ #define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */ @@ -458,6 +460,7 @@ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ +#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ @@ -482,7 +485,8 @@ #define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ -#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+ 9) /* Use thunk for indirect branches in lower half of cacheline */ +#define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */ +#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */ /* * BUG word(s) @@ -535,6 +539,8 @@ #define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */ #define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */ #define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ -#define X86_BUG_ITS X86_BUG( 1*32+ 6) /* "its" CPU is affected by Indirect Target Selection */ -#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 7) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ +#define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ +#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ +#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ + #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index e7d2f460fcc6..5cfb5d74dd5f 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -533,7 +533,7 @@ #define MSR_HWP_CAPABILITIES 0x00000771 #define MSR_HWP_REQUEST_PKG 0x00000772 #define MSR_HWP_INTERRUPT 0x00000773 -#define MSR_HWP_REQUEST 0x00000774 +#define MSR_HWP_REQUEST 0x00000774 #define MSR_HWP_STATUS 0x00000777 /* CPUID.6.EAX */ @@ -550,16 +550,16 @@ #define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) /* IA32_HWP_REQUEST */ -#define HWP_MIN_PERF(x) (x & 0xff) -#define HWP_MAX_PERF(x) ((x & 0xff) << 8) +#define HWP_MIN_PERF(x) (x & 0xff) +#define HWP_MAX_PERF(x) ((x & 0xff) << 8) #define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) -#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) +#define HWP_ENERGY_PERF_PREFERENCE(x) (((u64)x & 0xff) << 24) #define HWP_EPP_PERFORMANCE 0x00 #define HWP_EPP_BALANCE_PERFORMANCE 0x80 #define HWP_EPP_BALANCE_POWERSAVE 0xC0 #define HWP_EPP_POWERSAVE 0xFF -#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) -#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) +#define HWP_ACTIVITY_WINDOW(x) ((u64)(x & 0xff3) << 32) +#define HWP_PACKAGE_CONTROL(x) ((u64)(x & 0x1) << 42) /* IA32_HWP_STATUS */ #define HWP_GUARANTEED_CHANGE(x) (x & 0x1) @@ -602,7 +602,11 @@ /* V6 PMON MSR range */ #define MSR_IA32_PMC_V6_GP0_CTR 0x1900 #define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901 +#define MSR_IA32_PMC_V6_GP0_CFG_B 0x1902 +#define MSR_IA32_PMC_V6_GP0_CFG_C 0x1903 #define MSR_IA32_PMC_V6_FX0_CTR 0x1980 +#define MSR_IA32_PMC_V6_FX0_CFG_B 0x1982 +#define MSR_IA32_PMC_V6_FX0_CFG_C 0x1983 #define MSR_IA32_PMC_V6_STEP 4 /* KeyID partitioning between MKTME and TDX */ @@ -624,6 +628,7 @@ #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 #define MSR_AMD_PPIN 0xc00102f1 +#define MSR_AMD64_CPUID_FN_7 0xc0011002 #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index b663d916f162..6f3499507c5e 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -441,6 +441,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6) #define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) #define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8) +#define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 @@ -931,4 +932,74 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SNP_VM 4 #define KVM_X86_TDX_VM 5 +/* Trust Domain eXtension sub-ioctl() commands. */ +enum kvm_tdx_cmd_id { + KVM_TDX_CAPABILITIES = 0, + KVM_TDX_INIT_VM, + KVM_TDX_INIT_VCPU, + KVM_TDX_INIT_MEM_REGION, + KVM_TDX_FINALIZE_VM, + KVM_TDX_GET_CPUID, + + KVM_TDX_CMD_NR_MAX, +}; + +struct kvm_tdx_cmd { + /* enum kvm_tdx_cmd_id */ + __u32 id; + /* flags for sub-commend. If sub-command doesn't use this, set zero. */ + __u32 flags; + /* + * data for each sub-command. An immediate or a pointer to the actual + * data in process virtual address. If sub-command doesn't use it, + * set zero. + */ + __u64 data; + /* + * Auxiliary error code. The sub-command may return TDX SEAMCALL + * status code in addition to -Exxx. + */ + __u64 hw_error; +}; + +struct kvm_tdx_capabilities { + __u64 supported_attrs; + __u64 supported_xfam; + __u64 reserved[254]; + + /* Configurable CPUID bits for userspace */ + struct kvm_cpuid2 cpuid; +}; + +struct kvm_tdx_init_vm { + __u64 attributes; + __u64 xfam; + __u64 mrconfigid[6]; /* sha384 digest */ + __u64 mrowner[6]; /* sha384 digest */ + __u64 mrownerconfig[6]; /* sha384 digest */ + + /* The total space for TD_PARAMS before the CPUIDs is 256 bytes */ + __u64 reserved[12]; + + /* + * Call KVM_TDX_INIT_VM before vcpu creation, thus before + * KVM_SET_CPUID2. + * This configuration supersedes KVM_SET_CPUID2s for VCPUs because the + * TDX module directly virtualizes those CPUIDs without VMM. The user + * space VMM, e.g. qemu, should make KVM_SET_CPUID2 consistent with + * those values. If it doesn't, KVM may have wrong idea of vCPUIDs of + * the guest, and KVM may wrongly emulate CPUIDs or MSRs that the TDX + * module doesn't virtualize. + */ + struct kvm_cpuid2 cpuid; +}; + +#define KVM_TDX_MEASURE_MEMORY_REGION _BITULL(0) + +struct kvm_tdx_init_mem_region { + __u64 source_addr; + __u64 gpa; + __u64 nr_pages; +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index ec1321248dac..9c640a521a67 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -95,6 +95,7 @@ #define SVM_EXIT_CR14_WRITE_TRAP 0x09e #define SVM_EXIT_CR15_WRITE_TRAP 0x09f #define SVM_EXIT_INVPCID 0x0a2 +#define SVM_EXIT_BUS_LOCK 0x0a5 #define SVM_EXIT_IDLE_HLT 0x0a6 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 @@ -225,6 +226,7 @@ { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ + { SVM_EXIT_BUS_LOCK, "buslock" }, \ { SVM_EXIT_IDLE_HLT, "idle-halt" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index a5faf6d88f1b..f0f4a4cf84a7 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -34,6 +34,7 @@ #define EXIT_REASON_TRIPLE_FAULT 2 #define EXIT_REASON_INIT_SIGNAL 3 #define EXIT_REASON_SIPI_SIGNAL 4 +#define EXIT_REASON_OTHER_SMI 6 #define EXIT_REASON_INTERRUPT_WINDOW 7 #define EXIT_REASON_NMI_WINDOW 8 @@ -92,6 +93,7 @@ #define EXIT_REASON_TPAUSE 68 #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75 +#define EXIT_REASON_TDCALL 77 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -155,7 +157,8 @@ { EXIT_REASON_UMWAIT, "UMWAIT" }, \ { EXIT_REASON_TPAUSE, "TPAUSE" }, \ { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \ - { EXIT_REASON_NOTIFY, "NOTIFY" } + { EXIT_REASON_NOTIFY, "NOTIFY" }, \ + { EXIT_REASON_TDCALL, "TDCALL" } #define VMX_EXIT_REASON_FLAGS \ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 59cf6f9065aa..ccc3d923fc1e 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -40,6 +40,7 @@ SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy) +SYM_PIC_ALIAS(memcpy) EXPORT_SYMBOL(memcpy) SYM_FUNC_START_LOCAL(memcpy_orig) diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index d66b710d628f..fb5a03cf5ab7 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -42,6 +42,7 @@ SYM_FUNC_END(__memset) EXPORT_SYMBOL(__memset) SYM_FUNC_ALIAS_MEMFUNC(memset, __memset) +SYM_PIC_ALIAS(memset) EXPORT_SYMBOL(memset) SYM_FUNC_START_LOCAL(memset_orig) diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 14fd0ca9a6cd..7ad056219115 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -12,6 +12,7 @@ #define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG)) #define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) #define BITS_PER_BYTE 8 +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) /* * Create a contiguous bitmask starting at bit position @l and ending at @@ -19,16 +20,68 @@ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ #if !defined(__ASSEMBLY__) + +/* + * Missing asm support + * + * GENMASK_U*() and BIT_U*() depend on BITS_PER_TYPE() which relies on sizeof(), + * something not available in asm. Nevertheless, fixed width integers is a C + * concept. Assembly code can rely on the long and long long versions instead. + */ + #include <linux/build_bug.h> #include <linux/compiler.h> +#include <linux/overflow.h> + #define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h))) -#else + +/* + * Generate a mask for the specified type @t. Additional checks are made to + * guarantee the value returned fits in that type, relying on + * -Wshift-count-overflow compiler check to detect incompatible arguments. + * For example, all these create build errors or warnings: + * + * - GENMASK(15, 20): wrong argument order + * - GENMASK(72, 15): doesn't fit unsigned long + * - GENMASK_U32(33, 15): doesn't fit in a u32 + */ +#define GENMASK_TYPE(t, h, l) \ + ((t)(GENMASK_INPUT_CHECK(h, l) + \ + (type_max(t) << (l) & \ + type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h))))) + +#define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l) +#define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l) +#define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l) +#define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l) + +/* + * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The + * following examples generate compiler warnings due to -Wshift-count-overflow: + * + * - BIT_U8(8) + * - BIT_U32(-1) + * - BIT_U32(40) + */ +#define BIT_INPUT_CHECK(type, nr) \ + BUILD_BUG_ON_ZERO(const_true((nr) >= BITS_PER_TYPE(type))) + +#define BIT_TYPE(type, nr) ((type)(BIT_INPUT_CHECK(type, nr) + BIT_ULL(nr))) + +#define BIT_U8(nr) BIT_TYPE(u8, nr) +#define BIT_U16(nr) BIT_TYPE(u16, nr) +#define BIT_U32(nr) BIT_TYPE(u32, nr) +#define BIT_U64(nr) BIT_TYPE(u64, nr) + +#else /* defined(__ASSEMBLY__) */ + /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, * disable the input check if that is the case. */ #define GENMASK_INPUT_CHECK(h, l) 0 -#endif + +#endif /* !defined(__ASSEMBLY__) */ #define GENMASK(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index b4898ff085de..ab2aa97bd8ce 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -4,17 +4,17 @@ #include <linux/compiler.h> -#ifdef __CHECKER__ -#define BUILD_BUG_ON_ZERO(e) (0) -#else /* __CHECKER__ */ /* * Force a compilation error if condition is true, but also produce a * result (of value 0 and type int), so the expression can be used * e.g. in a structure initializer (or where-ever else comma expressions * aren't permitted). + * + * Take an error message as an optional second argument. If omitted, + * default to the stringification of the tested expression. */ -#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); }))) -#endif /* __CHECKER__ */ +#define BUILD_BUG_ON_ZERO(e, ...) \ + __BUILD_BUG_ON_ZERO_MSG(e, ##__VA_ARGS__, #e " is true") /* Force a compilation error if a constant expression is not a power of 2 */ #define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index d627e66a04a6..33411ca0cc90 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -244,6 +244,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __asm__ ("" : "=r" (var) : "0" (var)) #endif +#ifndef __BUILD_BUG_ON_ZERO_MSG +#if defined(__clang__) +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)(sizeof(struct { int:(-!!(e)); }))) +#else +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)sizeof(struct {_Static_assert(!(e), msg);})) +#endif +#endif + #endif /* __ASSEMBLY__ */ #endif /* _TOOLS_LINUX_COMPILER_H */ diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h index 5a37ccbec54f..f61a01dd7eb7 100644 --- a/tools/include/linux/kallsyms.h +++ b/tools/include/linux/kallsyms.h @@ -18,6 +18,7 @@ static inline const char *kallsyms_lookup(unsigned long addr, return NULL; } +#ifdef HAVE_BACKTRACE_SUPPORT #include <execinfo.h> #include <stdlib.h> static inline void print_ip_sym(const char *loglvl, unsigned long ip) @@ -30,5 +31,8 @@ static inline void print_ip_sym(const char *loglvl, unsigned long ip) free(name); } +#else +static inline void print_ip_sym(const char *loglvl, unsigned long ip) {} +#endif #endif diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 7fba37b94401..e63a71d3c607 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -905,13 +905,17 @@ struct drm_syncobj_destroy { }; #define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE (1 << 1) #define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_TIMELINE (1 << 1) struct drm_syncobj_handle { __u32 handle; __u32 flags; __s32 fd; __u32 pad; + + __u64 point; }; struct drm_syncobj_transfer { diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h index 682b406e1067..a04afef9efca 100644 --- a/tools/include/uapi/linux/bits.h +++ b/tools/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index 7a8f4c290187..3aff99f2696a 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -119,7 +119,7 @@ struct fscrypt_key_specifier { */ struct fscrypt_provisioning_key_payload { __u32 type; - __u32 __reserved; + __u32 flags; __u8 raw[]; }; @@ -128,7 +128,9 @@ struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; __u32 key_id; - __u32 __reserved[8]; +#define FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001 + __u32 flags; + __u32 __reserved[7]; __u8 raw[]; }; diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index b6ae8ad8934b..d00b85cb168c 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -375,6 +375,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_WAKEUP 4 #define KVM_SYSTEM_EVENT_SUSPEND 5 #define KVM_SYSTEM_EVENT_SEV_TERM 6 +#define KVM_SYSTEM_EVENT_TDX_FATAL 7 __u32 type; __u32 ndata; union { @@ -930,6 +931,9 @@ struct kvm_enable_cap { #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 #define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 +#define KVM_CAP_ARM_EL2 240 +#define KVM_CAP_ARM_EL2_E2H0 241 +#define KVM_CAP_RISCV_MP_STATE_RESET 242 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index f78ee3670dd5..1686861aae20 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -182,8 +182,12 @@ struct statx { /* File offset alignment for direct I/O reads */ __u32 stx_dio_read_offset_align; - /* 0xb8 */ - __u64 __spare3[9]; /* Spare space for future expansion */ + /* Optimised max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max_opt; + __u32 __spare2[1]; + + /* 0xc0 */ + __u64 __spare3[8]; /* Spare space for future expansion */ /* 0x100 */ }; diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 460c3e57fadb..0381f209920a 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -226,6 +226,9 @@ static void btf_dump_free_names(struct hashmap *map) size_t bkt; struct hashmap_entry *cur; + if (!map) + return; + hashmap__for_each_entry(map, cur, bkt) free((void *)cur->pkey); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e9c641a2fb20..52e353368f58 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -597,7 +597,7 @@ struct extern_desc { int sym_idx; int btf_id; int sec_btf_id; - const char *name; + char *name; char *essent_name; bool is_set; bool is_weak; @@ -4259,7 +4259,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return ext->btf_id; } t = btf__type_by_id(obj->btf, ext->btf_id); - ext->name = btf__name_by_offset(obj->btf, t->name_off); + ext->name = strdup(btf__name_by_offset(obj->btf, t->name_off)); + if (!ext->name) + return -ENOMEM; ext->sym_idx = i; ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; @@ -9138,8 +9140,10 @@ void bpf_object__close(struct bpf_object *obj) zfree(&obj->btf_custom_path); zfree(&obj->kconfig); - for (i = 0; i < obj->nr_extern; i++) + for (i = 0; i < obj->nr_extern; i++) { + zfree(&obj->externs[i].name); zfree(&obj->externs[i].essent_name); + } zfree(&obj->externs); obj->nr_extern = 0; diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 55b59f6c79b8..61deb5923067 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -231,14 +231,7 @@ class NlMsg: self.extack['unknown'].append(extack) if attr_space: - # We don't have the ability to parse nests yet, so only do global - if 'miss-type' in self.extack and 'miss-nest' not in self.extack: - miss_type = self.extack['miss-type'] - if miss_type in attr_space.attrs_by_val: - spec = attr_space.attrs_by_val[miss_type] - self.extack['miss-type'] = spec['name'] - if 'doc' in spec: - self.extack['miss-type-doc'] = spec['doc'] + self.annotate_extack(attr_space) def _decode_policy(self, raw): policy = {} @@ -264,6 +257,18 @@ class NlMsg: policy['mask'] = attr.as_scalar('u64') return policy + def annotate_extack(self, attr_space): + """ Make extack more human friendly with attribute information """ + + # We don't have the ability to parse nests yet, so only do global + if 'miss-type' in self.extack and 'miss-nest' not in self.extack: + miss_type = self.extack['miss-type'] + if miss_type in attr_space.attrs_by_val: + spec = attr_space.attrs_by_val[miss_type] + self.extack['miss-type'] = spec['name'] + if 'doc' in spec: + self.extack['miss-type-doc'] = spec['doc'] + def cmd(self): return self.nl_type @@ -277,12 +282,12 @@ class NlMsg: class NlMsgs: - def __init__(self, data, attr_space=None): + def __init__(self, data): self.msgs = [] offset = 0 while offset < len(data): - msg = NlMsg(data, offset, attr_space=attr_space) + msg = NlMsg(data, offset) offset += msg.nl_len self.msgs.append(msg) @@ -1034,12 +1039,13 @@ class YnlFamily(SpecFamily): op_rsp = [] while not done: reply = self.sock.recv(self._recv_size) - nms = NlMsgs(reply, attr_space=op.attr_set) + nms = NlMsgs(reply) self._recv_dbg_print(reply, nms) for nl_msg in nms: if nl_msg.nl_seq in reqs_by_seq: (op, vals, req_msg, req_flags) = reqs_by_seq[nl_msg.nl_seq] if nl_msg.extack: + nl_msg.annotate_extack(op.attr_set) self._decode_extack(req_msg, op, nl_msg.extack, vals) else: op = None diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f23bdda737aa..d967ac001498 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2318,6 +2318,7 @@ static int read_annotate(struct objtool_file *file, for_each_reloc(sec->rsec, reloc) { type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * sec->sh.sh_entsize) + 4); + type = bswap_if_needed(file->elf, type); offset = reloc->sym->offset + reloc_addend(reloc); insn = find_insn(file, reloc->sym->sec, offset); diff --git a/tools/perf/Documentation/perf-amd-ibs.txt b/tools/perf/Documentation/perf-amd-ibs.txt index 55f80beae037..548549935760 100644 --- a/tools/perf/Documentation/perf-amd-ibs.txt +++ b/tools/perf/Documentation/perf-amd-ibs.txt @@ -171,23 +171,48 @@ Below is a simple example of the perf mem tool. # perf mem report A normal perf mem report output will provide detailed memory access profile. -However, it can also be aggregated based on output fields. For example: - - # perf mem report -F mem,sample,snoop - Samples: 3M of event 'ibs_op//', Event count (approx.): 23524876 - Memory access Samples Snoop - N/A 1903343 N/A - L1 hit 1056754 N/A - L2 hit 75231 N/A - L3 hit 9496 HitM - L3 hit 2270 N/A - RAM hit 8710 N/A - Remote node, same socket RAM hit 3241 N/A - Remote core, same node Any cache hit 1572 HitM - Remote core, same node Any cache hit 514 N/A - Remote node, same socket Any cache hit 1216 HitM - Remote node, same socket Any cache hit 350 N/A - Uncached hit 18 N/A +New output fields will show related access info together. For example: + + # perf mem report -F overhead,cache,snoop,comm + ... + # Samples: 92K of event 'ibs_op//' + # Total weight : 531104 + # + # ---------- Cache ----------- --- Snoop ---- + # Overhead L1 L2 L1-buf Other HitM Other Command + # ........ ............................ .............. .......... + # + 76.07% 5.8% 35.7% 0.0% 34.6% 23.3% 52.8% cc1 + 5.79% 0.2% 0.0% 0.0% 5.6% 0.1% 5.7% make + 5.78% 0.1% 4.4% 0.0% 1.2% 0.5% 5.3% gcc + 5.33% 0.3% 3.9% 0.0% 1.1% 0.2% 5.2% as + 5.00% 0.1% 3.8% 0.0% 1.0% 0.3% 4.7% sh + 1.56% 0.1% 0.1% 0.0% 1.4% 0.6% 0.9% ld + 0.28% 0.1% 0.0% 0.0% 0.2% 0.1% 0.2% pkg-config + 0.09% 0.0% 0.0% 0.0% 0.1% 0.0% 0.1% git + 0.03% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% rm + ... + +Also, it can be aggregated based on various memory access info using the +sort keys. For example: + + # perf mem report -s mem,snoop + ... + # Samples: 92K of event 'ibs_op//' + # Total weight : 531104 + # Sort order : mem,snoop + # + # Overhead Samples Memory access Snoop + # ........ ............ ....................................... ............ + # + 47.99% 1509 L2 hit N/A + 25.08% 338 core, same node Any cache hit HitM + 10.24% 54374 N/A N/A + 6.77% 35938 L1 hit N/A + 6.39% 101 core, same node Any cache hit N/A + 3.50% 69 RAM hit N/A + 0.03% 158 LFB/MAB hit N/A + 0.00% 2 Uncached hit N/A Please refer to their man page for more detail. diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 965e73d37772..4d164836d094 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -119,6 +119,22 @@ REPORT OPTIONS And the default sort keys are changed to local_weight, mem, sym, dso, symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, local_ins_lat. +-F:: +--fields=:: + Specify output field - multiple keys can be specified in CSV format. + Please see linkperf:perf-report[1] for details. + + In addition to the default fields, 'perf mem report' will provide the + following fields to break down sample periods. + + - op: operation in the sample instruction (load, store, prefetch, ...) + - cache: location in CPU cache (L1, L2, ...) where the sample hit + - mem: location in memory or other places the sample hit + - dtlb: location in Data TLB (L1, L2) where the sample hit + - snoop: snoop result for the sampled data access + + Please take a look at the OUTPUT FIELD SELECTION section for caveats. + -T:: --type-profile:: Show data-type profile result instead of code symbols. This requires @@ -156,6 +172,40 @@ but one sample with weight 180 and the other with weight 20: 90% [k] memcpy 10% [.] strcmp +OUTPUT FIELD SELECTION +---------------------- +"perf mem report" adds a number of new output fields specific to data source +information in the sample. Some of them have the same name with the existing +sort keys ("mem" and "snoop"). So unlike other fields and sort keys, they'll +behave differently when it's used by -F/--fields or -s/--sort. + +Using those two as output fields will aggregate samples altogether and show +breakdown. + + $ perf mem report -F mem,snoop + ... + # ------ Memory ------- --- Snoop ---- + # RAM Uncach Other HitM Other + # ..................... .............. + # + 3.5% 0.0% 96.5% 25.1% 74.9% + +But using the same name for sort keys will aggregate samples for each type +separately. + + $ perf mem report -s mem,snoop + # Overhead Samples Memory access Snoop + # ........ ............ ....................................... ............ + # + 47.99% 1509 L2 hit N/A + 25.08% 338 core, same node Any cache hit HitM + 10.24% 54374 N/A N/A + 6.77% 35938 L1 hit N/A + 6.39% 101 core, same node Any cache hit N/A + 3.50% 69 RAM hit N/A + 0.03% 158 LFB/MAB hit N/A + 0.00% 2 Uncached hit N/A + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-arm-spe[1] diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index fdf133c9520f..d2d6d7f3ea33 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -18,7 +18,6 @@ #include <stdlib.h> #include <linux/compiler.h> #include <linux/kernel.h> -#include <linux/prctl.h> #include <linux/zalloc.h> #include <sys/time.h> #include <sys/mman.h> diff --git a/tools/perf/bench/futex.c b/tools/perf/bench/futex.c index 26382e4d8d4c..4c4fee107e59 100644 --- a/tools/perf/bench/futex.c +++ b/tools/perf/bench/futex.c @@ -2,11 +2,18 @@ #include <err.h> #include <stdio.h> #include <stdlib.h> -#include <linux/prctl.h> #include <sys/prctl.h> #include "futex.h" +#ifndef PR_FUTEX_HASH +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define FH_FLAG_IMMUTABLE (1ULL << 0) +# define PR_FUTEX_HASH_GET_SLOTS 2 +# define PR_FUTEX_HASH_GET_IMMUTABLE 3 +#endif // PR_FUTEX_HASH + void futex_set_nbuckets_param(struct bench_futex_parameters *params) { unsigned long flags; diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index e9fab20e9330..8085e4d1d8af 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -186,7 +186,7 @@ done # diff with extra ignore lines check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include <linux/cfi_types.h>"' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' -check arch/x86/include/asm/amd/ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"' +check arch/x86/include/asm/amd/ibs.h '-I "^#include .*/msr-index.h"' check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"' check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' diff --git a/tools/perf/tests/shell/stat+event_uniquifying.sh b/tools/perf/tests/shell/stat+event_uniquifying.sh index 5ec35c52b7d9..bf54bd6c3e2e 100755 --- a/tools/perf/tests/shell/stat+event_uniquifying.sh +++ b/tools/perf/tests/shell/stat+event_uniquifying.sh @@ -9,7 +9,8 @@ perf_tool=perf err=0 test_event_uniquifying() { - # We use `clockticks` to verify the uniquify behavior. + # We use `clockticks` in `uncore_imc` to verify the uniquify behavior. + pmu="uncore_imc" event="clockticks" # If the `-A` option is added, the event should be uniquified. @@ -43,11 +44,18 @@ test_event_uniquifying() { echo "stat event uniquifying test" uniquified_event_array=() + # Skip if the machine does not have `uncore_imc` device. + if ! ${perf_tool} list pmu | grep -q ${pmu}; then + echo "Target does not support PMU ${pmu} [Skipped]" + err=2 + return + fi + # Check how many uniquified events. while IFS= read -r line; do uniquified_event=$(echo "$line" | awk '{print $1}') uniquified_event_array+=("${uniquified_event}") - done < <(${perf_tool} list -v ${event} | grep "\[Kernel PMU event\]") + done < <(${perf_tool} list -v ${event} | grep ${pmu}) perf_command="${perf_tool} stat -e $event -A -o ${stat_output} -- true" $perf_command diff --git a/tools/perf/tests/tests-scripts.c b/tools/perf/tests/tests-scripts.c index 1d5759d08141..3a2a8438f9af 100644 --- a/tools/perf/tests/tests-scripts.c +++ b/tools/perf/tests/tests-scripts.c @@ -260,6 +260,7 @@ static void append_scripts_in_dir(int dir_fd, continue; /* Skip scripts that have a separate driver. */ fd = openat(dir_fd, ent->d_name, O_PATH); append_scripts_in_dir(fd, result, result_sz); + close(fd); } for (i = 0; i < n_dirs; i++) /* Clean up */ zfree(&entlist[i]); diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index c3322eb3d686..3b262487ec06 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -168,7 +168,7 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); } -static inline size_t msg_data_left(struct msghdr *msg) +static inline size_t msg_data_left(const struct msghdr *msg) { return iov_iter_count(&msg->msg_iter); } diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index e762e1af650c..0098b0ce8ccb 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -361,6 +361,7 @@ typedef int __bitwise __kernel_rwf_t; #define PAGE_IS_PFNZERO (1 << 5) #define PAGE_IS_HUGE (1 << 6) #define PAGE_IS_SOFT_DIRTY (1 << 7) +#define PAGE_IS_GUARD (1 << 8) /* * struct page_region - Page region with flags diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 15c18ef4eb11..43dec6eed559 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -364,4 +364,11 @@ struct prctl_mm_map { # define PR_TIMER_CREATE_RESTORE_IDS_ON 1 # define PR_TIMER_CREATE_RESTORE_IDS_GET 2 +/* FUTEX hash management */ +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define FH_FLAG_IMMUTABLE (1ULL << 0) +# define PR_FUTEX_HASH_GET_SLOTS 2 +# define PR_FUTEX_HASH_GET_IMMUTABLE 3 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h index f78ee3670dd5..1686861aae20 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/stat.h +++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h @@ -182,8 +182,12 @@ struct statx { /* File offset alignment for direct I/O reads */ __u32 stx_dio_read_offset_align; - /* 0xb8 */ - __u64 __spare3[9]; /* Spare space for future expansion */ + /* Optimised max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max_opt; + __u32 __spare2[1]; + + /* 0xc0 */ + __u64 __spare3[8]; /* Spare space for future expansion */ /* 0x100 */ }; diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index 178b00205fe6..89979ca23c3f 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -132,4 +132,8 @@ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif +#ifndef SYM_PIC_ALIAS +#define SYM_PIC_ALIAS(sym) SYM_ALIAS(__pi_ ## sym, sym, SYM_T_FUNC, SYM_L_GLOBAL) +#endif + #endif /* PERF_LINUX_LINKAGE_H_ */ diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index a786cbfb0ff5..83aaf7cda635 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -268,6 +268,7 @@ bool is_event_supported(u8 type, u64 config) ret = evsel__open(evsel, NULL, tmap) >= 0; } + evsel__close(evsel); evsel__delete(evsel); } diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index e2a2c46c008b..3d8378972d26 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -21,7 +21,6 @@ test_lirc_mode2_user flow_dissector_load test_tcpnotify_user test_libbpf -test_sysctl xdping test_cpp *.d diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index cf5ed3bee573..910d8d6402ef 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -73,7 +73,7 @@ endif # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_progs \ test_sockmap \ - test_tcpnotify_user test_sysctl \ + test_tcpnotify_user \ test_progs-no_alu32 TEST_INST_SUBDIRS := no_alu32 @@ -220,7 +220,7 @@ ifeq ($(VMLINUX_BTF),) $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)") endif -# Define simple and short `make test_progs`, `make test_sysctl`, etc targets +# Define simple and short `make test_progs`, `make test_maps`, etc targets # to build individual tests. # NOTE: Semicolon at the end is critical to override lib.mk's default static # rule for binaries. @@ -329,7 +329,6 @@ NETWORK_HELPERS := $(OUTPUT)/network_helpers.o $(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS) $(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS) -$(OUTPUT)/test_sysctl: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_tag: $(TESTING_HELPERS) $(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS) $(OUTPUT)/xdping: $(TESTING_HELPERS) diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c index bcdbd27f22f0..273dd41ca09e 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c @@ -1,22 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook -#include <fcntl.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -#include <linux/filter.h> - -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include <bpf/bpf_endian.h> -#include "bpf_util.h" +#include "test_progs.h" #include "cgroup_helpers.h" -#include "testing_helpers.h" #define CG_PATH "/foo" #define MAX_INSNS 512 @@ -1608,26 +1594,19 @@ static int run_tests(int cgfd) return fails ? -1 : 0; } -int main(int argc, char **argv) +void test_sysctl(void) { - int cgfd = -1; - int err = 0; + int cgfd; cgfd = cgroup_setup_and_join(CG_PATH); - if (cgfd < 0) - goto err; + if (!ASSERT_OK_FD(cgfd < 0, "create_cgroup")) + goto out; - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (!ASSERT_OK(run_tests(cgfd), "run_tests")) + goto out; - if (run_tests(cgfd)) - goto err; - - goto out; -err: - err = -1; out: close(cgfd); cleanup_cgroup_environment(); - return err; + return; } diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c index a3f220ba7025..ee65bad0436d 100644 --- a/tools/testing/selftests/bpf/progs/test_global_map_resize.c +++ b/tools/testing/selftests/bpf/progs/test_global_map_resize.c @@ -32,6 +32,16 @@ int my_int_last SEC(".data.array_not_last"); int percpu_arr[1] SEC(".data.percpu_arr"); +/* at least one extern is included, to ensure that a specific + * regression is tested whereby resizing resulted in a free-after-use + * bug after type information is invalidated by the resize operation. + * + * There isn't a particularly good API to test for this specific condition, + * but by having externs for the resizing tests it will cover this path. + */ +extern int LINUX_KERNEL_VERSION __kconfig; +long version_sink; + SEC("tp/syscalls/sys_enter_getpid") int bss_array_sum(void *ctx) { @@ -44,6 +54,9 @@ int bss_array_sum(void *ctx) for (size_t i = 0; i < bss_array_len; ++i) sum += array[i]; + /* see above; ensure this is not optimized out */ + version_sink = LINUX_KERNEL_VERSION; + return 0; } @@ -59,6 +72,9 @@ int data_array_sum(void *ctx) for (size_t i = 0; i < data_array_len; ++i) sum += my_array[i]; + /* see above; ensure this is not optimized out */ + version_sink = LINUX_KERNEL_VERSION; + return 0; } diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c index a7c0a553aa50..3e2d76ee8050 100644 --- a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c +++ b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c @@ -2,6 +2,7 @@ /* Copyright (c) 2024 Google LLC. */ #include <vmlinux.h> +#include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> @@ -82,4 +83,21 @@ int BPF_PROG(path_d_path_from_file_argument, struct file *file) return 0; } +SEC("lsm.s/inode_rename") +__success +int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + struct inode *inode = new_dentry->d_inode; + ino_t ino; + + if (!inode) + return 0; + ino = inode->i_ino; + if (ino == 0) + return -EACCES; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c index d6d3f4fcb24c..4b392c6c8fc4 100644 --- a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c +++ b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c @@ -2,6 +2,7 @@ /* Copyright (c) 2024 Google LLC. */ #include <vmlinux.h> +#include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <linux/limits.h> @@ -158,4 +159,18 @@ int BPF_PROG(path_d_path_kfunc_non_lsm, struct path *path, struct file *f) return 0; } +SEC("lsm.s/inode_rename") +__failure __msg("invalid mem access 'trusted_ptr_or_null_'") +int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + struct inode *inode = new_dentry->d_inode; + ino_t ino; + + ino = inode->i_ino; + if (ino == 0) + return -EACCES; + return 0; +} char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index fda7589c5023..0921939532c6 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -138,6 +138,18 @@ static int sched_next_online(int pid, int *next_to_try) return ret; } +/* Derive target_free from map_size, same as bpf_common_lru_populate */ +static unsigned int __tgt_size(unsigned int map_size) +{ + return (map_size / nr_cpus) / 2; +} + +/* Inverse of how bpf_common_lru_populate derives target_free from map_size. */ +static unsigned int __map_size(unsigned int tgt_free) +{ + return tgt_free * nr_cpus * 2; +} + /* Size of the LRU map is 2 * Add key=1 (+1 key) * Add key=2 (+1 key) @@ -231,11 +243,11 @@ static void test_lru_sanity0(int map_type, int map_flags) printf("Pass\n"); } -/* Size of the LRU map is 1.5*tgt_free - * Insert 1 to tgt_free (+tgt_free keys) - * Lookup 1 to tgt_free/2 - * Insert 1+tgt_free to 2*tgt_free (+tgt_free keys) - * => 1+tgt_free/2 to LOCALFREE_TARGET will be removed by LRU +/* Verify that unreferenced elements are recycled before referenced ones. + * Insert elements. + * Reference a subset of these. + * Insert more, enough to trigger recycling. + * Verify that unreferenced are recycled. */ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) { @@ -257,7 +269,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); - map_size = tgt_free + batch_size; + map_size = __map_size(tgt_free) + batch_size; lru_map_fd = create_map(map_type, map_flags, map_size); assert(lru_map_fd != -1); @@ -266,13 +278,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to tgt_free (+tgt_free keys) */ - end_key = 1 + tgt_free; + /* Insert map_size - batch_size keys */ + end_key = 1 + __map_size(tgt_free); for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Lookup 1 to tgt_free/2 */ + /* Lookup 1 to batch_size */ end_key = 1 + batch_size; for (key = 1; key < end_key; key++) { assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); @@ -280,12 +292,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) BPF_NOEXIST)); } - /* Insert 1+tgt_free to 2*tgt_free - * => 1+tgt_free/2 to LOCALFREE_TARGET will be + /* Insert another map_size - batch_size keys + * Map will contain 1 to batch_size plus these latest, i.e., + * => previous 1+batch_size to map_size - batch_size will have been * removed by LRU */ - key = 1 + tgt_free; - end_key = key + tgt_free; + key = 1 + __map_size(tgt_free); + end_key = key + __map_size(tgt_free); for (; key < end_key; key++) { assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -301,17 +314,8 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) printf("Pass\n"); } -/* Size of the LRU map 1.5 * tgt_free - * Insert 1 to tgt_free (+tgt_free keys) - * Update 1 to tgt_free/2 - * => The original 1 to tgt_free/2 will be removed due to - * the LRU shrink process - * Re-insert 1 to tgt_free/2 again and do a lookup immeidately - * Insert 1+tgt_free to tgt_free*3/2 - * Insert 1+tgt_free*3/2 to tgt_free*5/2 - * => Key 1+tgt_free to tgt_free*3/2 - * will be removed from LRU because it has never - * been lookup and ref bit is not set +/* Verify that insertions exceeding map size will recycle the oldest. + * Verify that unreferenced elements are recycled before referenced. */ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) { @@ -334,7 +338,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); - map_size = tgt_free + batch_size; + map_size = __map_size(tgt_free) + batch_size; lru_map_fd = create_map(map_type, map_flags, map_size); assert(lru_map_fd != -1); @@ -343,8 +347,8 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to tgt_free (+tgt_free keys) */ - end_key = 1 + tgt_free; + /* Insert map_size - batch_size keys */ + end_key = 1 + __map_size(tgt_free); for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -357,8 +361,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) * shrink the inactive list to get tgt_free * number of free nodes. * - * Hence, the oldest key 1 to tgt_free/2 - * are removed from the LRU list. + * Hence, the oldest key is removed from the LRU list. */ key = 1; if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { @@ -370,8 +373,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) BPF_EXIST)); } - /* Re-insert 1 to tgt_free/2 again and do a lookup - * immeidately. + /* Re-insert 1 to batch_size again and do a lookup immediately. */ end_key = 1 + batch_size; value[0] = 4321; @@ -387,17 +389,18 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1+tgt_free to tgt_free*3/2 */ - end_key = 1 + tgt_free + batch_size; - for (key = 1 + tgt_free; key < end_key; key++) + /* Insert batch_size new elements */ + key = 1 + __map_size(tgt_free); + end_key = key + batch_size; + for (; key < end_key; key++) /* These newly added but not referenced keys will be * gone during the next LRU shrink. */ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Insert 1+tgt_free*3/2 to tgt_free*5/2 */ - end_key = key + tgt_free; + /* Insert map_size - batch_size elements */ + end_key += __map_size(tgt_free); for (; key < end_key; key++) { assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -413,12 +416,12 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) printf("Pass\n"); } -/* Size of the LRU map is 2*tgt_free - * It is to test the active/inactive list rotation - * Insert 1 to 2*tgt_free (+2*tgt_free keys) - * Lookup key 1 to tgt_free*3/2 - * Add 1+2*tgt_free to tgt_free*5/2 (+tgt_free/2 keys) - * => key 1+tgt_free*3/2 to 2*tgt_free are removed from LRU +/* Test the active/inactive list rotation + * + * Fill the whole map, deplete the free list. + * Reference all except the last lru->target_free elements. + * Insert lru->target_free new elements. This triggers one shrink. + * Verify that the non-referenced elements are replaced. */ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) { @@ -437,8 +440,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) assert(sched_next_online(0, &next_cpu) != -1); - batch_size = tgt_free / 2; - assert(batch_size * 2 == tgt_free); + batch_size = __tgt_size(tgt_free); map_size = tgt_free * 2; lru_map_fd = create_map(map_type, map_flags, map_size); @@ -449,23 +451,21 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to 2*tgt_free (+2*tgt_free keys) */ - end_key = 1 + (2 * tgt_free); + /* Fill the map */ + end_key = 1 + map_size; for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Lookup key 1 to tgt_free*3/2 */ - end_key = tgt_free + batch_size; + /* Reference all but the last batch_size */ + end_key = 1 + map_size - batch_size; for (key = 1; key < end_key; key++) { assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, BPF_NOEXIST)); } - /* Add 1+2*tgt_free to tgt_free*5/2 - * (+tgt_free/2 keys) - */ + /* Insert new batch_size: replaces the non-referenced elements */ key = 2 * tgt_free + 1; end_key = key + batch_size; for (; key < end_key; key++) { @@ -500,7 +500,8 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free) lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free * nr_cpus); else - lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free); + lru_map_fd = create_map(map_type, map_flags, + 3 * __map_size(tgt_free)); assert(lru_map_fd != -1); expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 9984413be9f0..68f8e479ac36 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -461,10 +461,15 @@ TEST_F(coredump, socket_detect_userspace_client) _exit(EXIT_FAILURE); } + ret = read(fd_coredump, &c, 1); + close(fd_coredump); close(fd_server); close(fd_peer_pidfd); close(fd_core_file); + + if (ret < 1) + _exit(EXIT_FAILURE); _exit(EXIT_SUCCESS); } self->pid_coredump_server = pid_coredump_server; diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py index f439c434ba36..648ff50bc1c3 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py +++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py @@ -38,7 +38,7 @@ def test_rss_input_xfrm(cfg, ipver): raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") input_xfrm = cfg.ethnl.rss_get( - {'header': {'dev-name': cfg.ifname}}).get('input_xfrm') + {'header': {'dev-name': cfg.ifname}}).get('input-xfrm') # Check for symmetric xor/or-xor if not input_xfrm or (input_xfrm != 1 and input_xfrm != 2): diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh index 1bb46ec435d4..7f32b5600925 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/peer.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh @@ -1,7 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0-only -source ../../../net/lib.sh +lib_dir=$(dirname $0)/../../../net +source $lib_dir/lib.sh NSIM_DEV_1_ID=$((256 + RANDOM % 256)) NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index 7b24ae89594a..776ad658f75e 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -11,3 +11,4 @@ futex_wait_timeout futex_wait_uninitialized_heap futex_wait_wouldblock futex_waitv +futex_numa diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c index 20a9d3ecf743..a9ecfb2d3932 100644 --- a/tools/testing/selftests/futex/functional/futex_numa_mpol.c +++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c @@ -144,7 +144,7 @@ int main(int argc, char *argv[]) struct futex32_numa *futex_numa; int mem_size, i; void *futex_ptr; - char c; + int c; while ((c = getopt(argc, argv, "chv:")) != -1) { switch (c) { @@ -210,6 +210,10 @@ int main(int argc, char *argv[]) ret = mbind(futex_ptr, mem_size, MPOL_BIND, &nodemask, sizeof(nodemask) * 8, 0); if (ret == 0) { + ret = numa_set_mempolicy_home_node(futex_ptr, mem_size, i, 0); + if (ret != 0) + ksft_exit_fail_msg("Failed to set home node: %m, %d\n", errno); + ksft_print_msg("Node %d test\n", i); futex_numa->futex = 0; futex_numa->numa = FUTEX_NO_NODE; @@ -220,8 +224,8 @@ int main(int argc, char *argv[]) if (0) test_futex_mpol(futex_numa, 0); if (futex_numa->numa != i) { - ksft_test_result_fail("Returned NUMA node is %d expected %d\n", - futex_numa->numa, i); + ksft_exit_fail_msg("Returned NUMA node is %d expected %d\n", + futex_numa->numa, i); } } } diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c index 2dca18fefedc..24a92dc94eb8 100644 --- a/tools/testing/selftests/futex/functional/futex_priv_hash.c +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -130,7 +130,7 @@ int main(int argc, char *argv[]) pthread_mutexattr_t mutex_attr_pi; int use_global_hash = 0; int ret; - char c; + int c; while ((c = getopt(argc, argv, "cghv:")) != -1) { switch (c) { diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 1a8e85afe9aa..1926ef6b40ab 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -54,6 +54,8 @@ static __attribute__((constructor)) void setup_sizes(void) mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, &mfd); + assert(mfd_buffer != MAP_FAILED); + assert(mfd > 0); } FIXTURE(iommufd) @@ -1746,13 +1748,15 @@ TEST_F(iommufd_mock_domain, all_aligns) unsigned int end; uint8_t *buf; int prot = PROT_READ | PROT_WRITE; - int mfd; + int mfd = -1; if (variant->file) buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); else buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); + if (variant->file) + ASSERT_GT(mfd, 0); check_refs(buf, buf_size, 0); /* @@ -1798,13 +1802,15 @@ TEST_F(iommufd_mock_domain, all_aligns_copy) unsigned int end; uint8_t *buf; int prot = PROT_READ | PROT_WRITE; - int mfd; + int mfd = -1; if (variant->file) buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); else buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); + if (variant->file) + ASSERT_GT(mfd, 0); check_refs(buf, buf_size, 0); /* @@ -2008,6 +2014,7 @@ FIXTURE_VARIANT(iommufd_dirty_tracking) FIXTURE_SETUP(iommufd_dirty_tracking) { + size_t mmap_buffer_size; unsigned long size; int mmap_flags; void *vrc; @@ -2022,22 +2029,33 @@ FIXTURE_SETUP(iommufd_dirty_tracking) self->fd = open("/dev/iommu", O_RDWR); ASSERT_NE(-1, self->fd); - rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, variant->buffer_size); - if (rc || !self->buffer) { - SKIP(return, "Skipping buffer_size=%lu due to errno=%d", - variant->buffer_size, rc); - } - mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED; + mmap_buffer_size = variant->buffer_size; if (variant->hugepages) { /* * MAP_POPULATE will cause the kernel to fail mmap if THPs are * not available. */ mmap_flags |= MAP_HUGETLB | MAP_POPULATE; + + /* + * Allocation must be aligned to the HUGEPAGE_SIZE, because the + * following mmap() will automatically align the length to be a + * multiple of the underlying huge page size. Failing to do the + * same at this allocation will result in a memory overwrite by + * the mmap(). + */ + if (mmap_buffer_size < HUGEPAGE_SIZE) + mmap_buffer_size = HUGEPAGE_SIZE; + } + + rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, mmap_buffer_size); + if (rc || !self->buffer) { + SKIP(return, "Skipping buffer_size=%lu due to errno=%d", + mmap_buffer_size, rc); } assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0); - vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE, + vrc = mmap(self->buffer, mmap_buffer_size, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); assert(vrc == self->buffer); @@ -2066,8 +2084,8 @@ FIXTURE_SETUP(iommufd_dirty_tracking) FIXTURE_TEARDOWN(iommufd_dirty_tracking) { - munmap(self->buffer, variant->buffer_size); - munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE)); + free(self->buffer); + free(self->bitmap); teardown_iommufd(self->fd, _metadata); } diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 72f6636e5d90..6e967b58acfd 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -60,13 +60,18 @@ static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p) { int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0; int mfd = memfd_create("buffer", mfd_flags); + void *buf = MAP_FAILED; if (mfd <= 0) return MAP_FAILED; if (ftruncate(mfd, length)) - return MAP_FAILED; + goto out; *mfd_p = mfd; - return mmap(0, length, prot, flags, mfd, 0); + buf = mmap(0, length, prot, flags, mfd, 0); +out: + if (buf == MAP_FAILED) + close(mfd); + return buf; } /* diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index b4d22b3ab7cc..4e71740a098b 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -954,6 +954,8 @@ static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); } +static int gic_fd; + static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, enum arch_timer timer) { @@ -968,12 +970,20 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, vcpu_args_set(*vcpu, 1, timer); test_init_timer_irq(*vm, *vcpu); - vgic_v3_setup(*vm, 1, 64); + gic_fd = vgic_v3_setup(*vm, 1, 64); + __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); + sync_global_to_guest(*vm, test_args); sync_global_to_guest(*vm, CVAL_MAX); sync_global_to_guest(*vm, DEF_CNT); } +static void test_vm_cleanup(struct kvm_vm *vm) +{ + close(gic_fd); + kvm_vm_free(vm); +} + static void test_print_help(char *name) { pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n" @@ -1060,13 +1070,13 @@ int main(int argc, char *argv[]) if (test_args.test_virtual) { test_vm_create(&vm, &vcpu, VIRTUAL); test_run(vm, vcpu); - kvm_vm_free(vm); + test_vm_cleanup(vm); } if (test_args.test_physical) { test_vm_create(&vm, &vcpu, PHYSICAL); test_run(vm, vcpu); - kvm_vm_free(vm); + test_vm_cleanup(vm); } return 0; diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 390ae2d87493..0eb371c62ab8 100644 --- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -74,6 +74,7 @@ int main(int argc, char *argv[]) int testcase; char test[80]; + TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); ksft_print_header(); diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config index a28baa536332..deba93379c80 100644 --- a/tools/testing/selftests/mm/config +++ b/tools/testing/selftests/mm/config @@ -8,3 +8,6 @@ CONFIG_GUP_TEST=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_MEM_SOFT_DIRTY=y CONFIG_ANON_VMA_NAME=y +CONFIG_FTRACE=y +CONFIG_PROFILING=y +CONFIG_UPROBES=y diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c index bbae66fc5038..cc26480098ae 100644 --- a/tools/testing/selftests/mm/merge.c +++ b/tools/testing/selftests/mm/merge.c @@ -470,7 +470,9 @@ TEST_F(merge, handle_uprobe_upon_merged_vma) ASSERT_GE(fd, 0); ASSERT_EQ(ftruncate(fd, page_size), 0); - ASSERT_EQ(read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type), 0); + if (read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type) != 0) { + SKIP(goto out, "Failed to read uprobe sysfs file, skipping"); + } memset(&attr, 0, attr_sz); attr.size = attr_sz; @@ -491,6 +493,7 @@ TEST_F(merge, handle_uprobe_upon_merged_vma) ASSERT_NE(mremap(ptr2, page_size, page_size, MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED); +out: close(fd); remove(probe_file); } diff --git a/tools/testing/selftests/mm/settings b/tools/testing/selftests/mm/settings index a953c96aa16e..e2206265f67c 100644 --- a/tools/testing/selftests/mm/settings +++ b/tools/testing/selftests/mm/settings @@ -1 +1 @@ -timeout=180 +timeout=900 diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c index b380e102b22f..169dbd692bf5 100644 --- a/tools/testing/selftests/mm/virtual_address_range.c +++ b/tools/testing/selftests/mm/virtual_address_range.c @@ -77,8 +77,11 @@ static void validate_addr(char *ptr, int high_addr) { unsigned long addr = (unsigned long) ptr; - if (high_addr && addr < HIGH_ADDR_MARK) - ksft_exit_fail_msg("Bad address %lx\n", addr); + if (high_addr) { + if (addr < HIGH_ADDR_MARK) + ksft_exit_fail_msg("Bad address %lx\n", addr); + return; + } if (addr > HIGH_ADDR_MARK) ksft_exit_fail_msg("Bad address %lx\n", addr); diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 532bb732bc6d..c6dd2a335cf4 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -50,6 +50,7 @@ tap tcp_fastopen_backup_key tcp_inq tcp_mmap +tfo timestamping tls toeplitz diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ab996bd22a5f..332f387615d7 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -110,6 +110,8 @@ TEST_GEN_PROGS += proc_net_pktgen TEST_PROGS += lwt_dst_cache_ref_loop.sh TEST_PROGS += skf_net_off.sh TEST_GEN_FILES += skf_net_off +TEST_GEN_FILES += tfo +TEST_PROGS += tfo_passive.sh # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 3ed3882a93b8..b5f474969917 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -210,7 +210,7 @@ static void __sendpair(struct __test_metadata *_metadata, static void __recvpair(struct __test_metadata *_metadata, FIXTURE_DATA(msg_oob) *self, const char *expected_buf, int expected_len, - int buf_len, int flags) + int buf_len, int flags, bool is_sender) { int i, ret[2], recv_errno[2], expected_errno = 0; char recv_buf[2][BUF_SZ] = {}; @@ -221,7 +221,9 @@ static void __recvpair(struct __test_metadata *_metadata, errno = 0; for (i = 0; i < 2; i++) { - ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags); + int index = is_sender ? i * 2 : i * 2 + 1; + + ret[i] = recv(self->fd[index], recv_buf[i], buf_len, flags); recv_errno[i] = errno; } @@ -308,6 +310,20 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, ASSERT_EQ(answ[0], answ[1]); } +static void __resetpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const FIXTURE_VARIANT(msg_oob) *variant, + bool reset) +{ + int i; + + for (i = 0; i < 2; i++) + close(self->fd[i * 2 + 1]); + + __recvpair(_metadata, self, "", reset ? -ECONNRESET : 0, 1, + variant->peek ? MSG_PEEK : 0, true); +} + #define sendpair(buf, len, flags) \ __sendpair(_metadata, self, buf, len, flags) @@ -316,9 +332,10 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, if (variant->peek) \ __recvpair(_metadata, self, \ expected_buf, expected_len, \ - buf_len, (flags) | MSG_PEEK); \ + buf_len, (flags) | MSG_PEEK, false); \ __recvpair(_metadata, self, \ - expected_buf, expected_len, buf_len, flags); \ + expected_buf, expected_len, \ + buf_len, flags, false); \ } while (0) #define epollpair(oob_remaining) \ @@ -330,6 +347,9 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, #define setinlinepair() \ __setinlinepair(_metadata, self) +#define resetpair(reset) \ + __resetpair(_metadata, self, variant, reset) + #define tcp_incompliant \ for (self->tcp_compliant = false; \ self->tcp_compliant == false; \ @@ -344,6 +364,21 @@ TEST_F(msg_oob, non_oob) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(true); +} + +TEST_F(msg_oob, non_oob_no_reset) +{ + sendpair("x", 1, 0); + epollpair(false); + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob) @@ -355,6 +390,19 @@ TEST_F(msg_oob, oob) recvpair("x", 1, 1, MSG_OOB); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } +} + +TEST_F(msg_oob, oob_reset) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + resetpair(true); } TEST_F(msg_oob, oob_drop) @@ -370,6 +418,8 @@ TEST_F(msg_oob, oob_drop) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_ahead) @@ -385,6 +435,10 @@ TEST_F(msg_oob, oob_ahead) recvpair("hell", 4, 4, 0); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } } TEST_F(msg_oob, oob_break) @@ -403,6 +457,8 @@ TEST_F(msg_oob, oob_break) recvpair("", -EAGAIN, 1, 0); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_ahead_break) @@ -426,6 +482,8 @@ TEST_F(msg_oob, oob_ahead_break) recvpair("world", 5, 5, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_break_drop) @@ -449,6 +507,8 @@ TEST_F(msg_oob, oob_break_drop) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, ex_oob_break) @@ -476,6 +536,8 @@ TEST_F(msg_oob, ex_oob_break) recvpair("ld", 2, 2, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, ex_oob_drop) @@ -498,6 +560,8 @@ TEST_F(msg_oob, ex_oob_drop) epollpair(false); siocatmarkpair(true); } + + resetpair(false); } TEST_F(msg_oob, ex_oob_drop_2) @@ -523,6 +587,8 @@ TEST_F(msg_oob, ex_oob_drop_2) epollpair(false); siocatmarkpair(true); } + + resetpair(false); } TEST_F(msg_oob, ex_oob_oob) @@ -546,6 +612,54 @@ TEST_F(msg_oob, ex_oob_oob) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); +} + +TEST_F(msg_oob, ex_oob_ex_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } +} + +TEST_F(msg_oob, ex_oob_ex_oob_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("z", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); } TEST_F(msg_oob, ex_oob_ahead_break) @@ -576,6 +690,10 @@ TEST_F(msg_oob, ex_oob_ahead_break) recvpair("d", 1, 1, MSG_OOB); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } } TEST_F(msg_oob, ex_oob_siocatmark) @@ -595,6 +713,8 @@ TEST_F(msg_oob, ex_oob_siocatmark) recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ epollpair(true); siocatmarkpair(false); + + resetpair(true); } TEST_F(msg_oob, inline_oob) @@ -612,6 +732,8 @@ TEST_F(msg_oob, inline_oob) recvpair("x", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_oob_break) @@ -633,6 +755,8 @@ TEST_F(msg_oob, inline_oob_break) recvpair("o", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_oob_ahead_break) @@ -661,6 +785,8 @@ TEST_F(msg_oob, inline_oob_ahead_break) epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_break) @@ -686,6 +812,8 @@ TEST_F(msg_oob, inline_ex_oob_break) recvpair("rld", 3, 3, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_no_drop) @@ -707,6 +835,8 @@ TEST_F(msg_oob, inline_ex_oob_no_drop) recvpair("y", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_drop) @@ -731,6 +861,8 @@ TEST_F(msg_oob, inline_ex_oob_drop) epollpair(false); siocatmarkpair(false); } + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_siocatmark) @@ -752,6 +884,8 @@ TEST_F(msg_oob, inline_ex_oob_siocatmark) recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ epollpair(true); siocatmarkpair(false); + + resetpair(true); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh index 5b34f6e1f831..48eb999a3120 100755 --- a/tools/testing/selftests/net/gre_ipv6_lladdr.sh +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -24,7 +24,10 @@ setup_basenet() ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad } -# Check if network device has an IPv6 link-local address assigned. +# Check the IPv6 configuration of a network device. +# +# We currently check the generation of the link-local IPv6 address and the +# creation of the ff00::/8 multicast route. # # Parameters: # @@ -35,7 +38,7 @@ setup_basenet() # a link-local address) # * $4: The user visible name for the scenario being tested # -check_ipv6_ll_addr() +check_ipv6_device_config() { local DEV="$1" local EXTRA_MATCH="$2" @@ -45,7 +48,11 @@ check_ipv6_ll_addr() RET=0 set +e ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" - check_err_fail "${XRET}" $? "" + check_err_fail "${XRET}" $? "IPv6 link-local address generation" + + ip -netns "${NS0}" -6 route show table local type multicast ff00::/8 proto kernel | grep -q "${DEV}" + check_err_fail 0 $? "IPv6 multicast route creation" + log_test "${MSG}" set -e } @@ -102,20 +109,20 @@ test_gre_device() ;; esac - # Check that IPv6 link-local address is generated when device goes up + # Check the IPv6 device configuration when it goes up ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" ip -netns "${NS0}" link set dev gretest up - check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" # Now disable link-local address generation ip -netns "${NS0}" link set dev gretest down ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 ip -netns "${NS0}" link set dev gretest up - # Check that link-local address generation works when re-enabled while - # the device is already up + # Check the IPv6 device configuration when link-local address + # generation is re-enabled while the device is already up ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" - check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" ip -netns "${NS0}" link del dev gretest } @@ -126,7 +133,7 @@ test_gre4() local MODE for GRE_TYPE in "gre" "gretap"; do - printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n" for MODE in "eui64" "none" "stable-privacy" "random"; do test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" @@ -142,7 +149,7 @@ test_gre6() local MODE for GRE_TYPE in "ip6gre" "ip6gretap"; do - printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n" for MODE in "eui64" "none" "stable-privacy" "random"; do test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 006fdadcc4b9..86a216e9aca8 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -312,7 +312,7 @@ log_test_result() local test_name=$1; shift local opt_str=$1; shift local result=$1; shift - local retmsg=$1; shift + local retmsg=$1 printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" if [[ $retmsg ]]; then diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt new file mode 100644 index 000000000000..09aabc775e80 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"` + +// Test that a not-yet-accepted socket does not change +// its initial sk_rcvbuf (tcp_rmem[1]) when receiving ooo packets. + + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001> + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001> + +0 < . 1:1001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 1001 <nop,nop,sack 2001:101001> + +0 < . 1001:2001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 101001 + + +0 accept(3, ..., ...) = 4 + + +0 %{ assert SK_MEMINFO_RCVBUF == 131072, SK_MEMINFO_RCVBUF }% + + +0 close(4) = 0 + +0 close(3) = 0 + +// Test that ooo packets for accepted sockets do increase sk_rcvbuf + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001> + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001> + + +0 %{ assert SK_MEMINFO_RCVBUF > 131072, SK_MEMINFO_RCVBUF }% + diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c new file mode 100644 index 000000000000..eb3cac5e583c --- /dev/null +++ b/tools/testing/selftests/net/tfo.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <error.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <netinet/tcp.h> +#include <errno.h> + +static int cfg_server; +static int cfg_client; +static int cfg_port = 8000; +static struct sockaddr_in6 cfg_addr; +static char *cfg_outfile; + +static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) +{ + int ret; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + + ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); + if (ret != 1) { + /* fallback to plain IPv4 */ + ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); + if (ret != 1) + return -1; + + /* add ::ffff prefix */ + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr16[4] = 0; + sin6->sin6_addr.s6_addr16[5] = 0xffff; + } + + return 0; +} + +static void run_server(void) +{ + unsigned long qlen = 32; + int fd, opt, connfd; + socklen_t len; + char buf[64]; + FILE *outfile; + + outfile = fopen(cfg_outfile, "w"); + if (!outfile) + error(1, errno, "fopen() outfile"); + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, errno, "socket()"); + + opt = 1; + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) + error(1, errno, "setsockopt(SO_REUSEADDR)"); + + if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) < 0) + error(1, errno, "setsockopt(TCP_FASTOPEN)"); + + if (bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)) < 0) + error(1, errno, "bind()"); + + if (listen(fd, 5) < 0) + error(1, errno, "listen()"); + + len = sizeof(cfg_addr); + connfd = accept(fd, (struct sockaddr *)&cfg_addr, &len); + if (connfd < 0) + error(1, errno, "accept()"); + + len = sizeof(opt); + if (getsockopt(connfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &opt, &len) < 0) + error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)"); + + read(connfd, buf, 64); + fprintf(outfile, "%d\n", opt); + + fclose(outfile); + close(connfd); + close(fd); +} + +static void run_client(void) +{ + int fd; + char *msg = "Hello, world!"; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, errno, "socket()"); + + sendto(fd, msg, strlen(msg), MSG_FASTOPEN, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)); + + close(fd); +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s (-s|-c) -h<server_ip> -p<port> -o<outfile> ", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + struct sockaddr_in6 *addr6 = (void *) &cfg_addr; + char *addr = NULL; + int ret; + int c; + + if (argc <= 1) + usage(argv[0]); + + while ((c = getopt(argc, argv, "sch:p:o:")) != -1) { + switch (c) { + case 's': + if (cfg_client) + error(1, 0, "Pass one of -s or -c"); + cfg_server = 1; + break; + case 'c': + if (cfg_server) + error(1, 0, "Pass one of -s or -c"); + cfg_client = 1; + break; + case 'h': + addr = optarg; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 'o': + cfg_outfile = strdup(optarg); + if (!cfg_outfile) + error(1, 0, "outfile invalid"); + break; + } + } + + if (cfg_server && addr) + error(1, 0, "Server cannot have -h specified"); + + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + addr6->sin6_addr = in6addr_any; + if (addr) { + ret = parse_address(addr, cfg_port, addr6); + if (ret) + error(1, 0, "Client address parse error: %s", addr); + } +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + if (cfg_server) + run_server(); + else if (cfg_client) + run_client(); + + return 0; +} diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh new file mode 100755 index 000000000000..80bf11fdc046 --- /dev/null +++ b/tools/testing/selftests/net/tfo_passive.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source lib.sh + +NSIM_SV_ID=$((256 + RANDOM % 256)) +NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID +NSIM_CL_ID=$((512 + RANDOM % 256)) +NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device +NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device +NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device + +SERVER_IP=192.168.1.1 +CLIENT_IP=192.168.1.2 +SERVER_PORT=48675 + +setup_ns() +{ + set -e + ip netns add nssv + ip netns add nscl + + NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_SV_SYS/net -exec basename {} \;) + NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_CL_SYS/net -exec basename {} \;) + + ip link set $NSIM_SV_NAME netns nssv + ip link set $NSIM_CL_NAME netns nscl + + ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME + ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME + + ip netns exec nssv ip link set dev $NSIM_SV_NAME up + ip netns exec nscl ip link set dev $NSIM_CL_NAME up + + # Enable passive TFO + ip netns exec nssv sysctl -w net.ipv4.tcp_fastopen=519 > /dev/null + + set +e +} + +cleanup_ns() +{ + ip netns del nscl + ip netns del nssv +} + +### +### Code start +### + +modprobe netdevsim + +# linking + +echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW +echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW +udevadm settle + +setup_ns + +NSIM_SV_FD=$((256 + RANDOM % 256)) +exec {NSIM_SV_FD}</var/run/netns/nssv +NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex) + +NSIM_CL_FD=$((256 + RANDOM % 256)) +exec {NSIM_CL_FD}</var/run/netns/nscl +NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex) + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \ + $NSIM_DEV_SYS_LINK + +if [ $? -ne 0 ]; then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup_ns + exit 1 +fi + +out_file=$(mktemp) + +timeout -k 1s 30s ip netns exec nssv ./tfo \ + -s \ + -p ${SERVER_PORT} \ + -o ${out_file}& + +wait_local_port_listen nssv ${SERVER_PORT} tcp + +ip netns exec nscl ./tfo -c -h ${SERVER_IP} -p ${SERVER_PORT} + +wait + +res=$(cat $out_file) +rm $out_file + +if [ $res -eq 0 ]; then + echo "got invalid NAPI ID from passive TFO socket" + cleanup_ns + exit 1 +fi + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK + +echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL + +cleanup_ns + +modprobe -r netdevsim + +exit 0 diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 9aa44d8176d9..5c6851e8d311 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -635,5 +635,42 @@ "$TC qdisc del dev $DUMMY handle 1:0 root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "d74b", + "name": "Test use-after-free with DRR/NETEM/BLACKHOLE chain", + "category": [ + "qdisc", + "hfsc", + "drr", + "netem", + "blackhole" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: drr", + "$TC filter add dev $DUMMY parent 1: basic classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: hfsc def 1", + "$TC class add dev $DUMMY parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0", + "$TC qdisc add dev $DUMMY parent 2:1 handle 3: netem", + "$TC qdisc add dev $DUMMY parent 3:1 handle 4: blackhole", + "ping -c1 -W0.01 -I $DUMMY 10.10.11.11 || true", + "$TC class del dev $DUMMY classid 1:1" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.11.11", + "expExitCode": "1", + "verifyCmd": "$TC -j class ls dev $DUMMY classid 1:1", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: drr" + ] } ] diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh index 6eef282d569f..3ed4c9b2d8c0 100755 --- a/tools/testing/selftests/ublk/test_stress_03.sh +++ b/tools/testing/selftests/ublk/test_stress_03.sh @@ -32,22 +32,23 @@ _create_backfile 2 128M ublk_io_and_remove 8G -t null -q 4 -z & ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait if _have_feature "AUTO_BUF_REG"; then ublk_io_and_remove 8G -t null -q 4 --auto_zc & ublk_io_and_remove 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & ublk_io_and_remove 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & + wait fi -wait if _have_feature "PER_IO_DAEMON"; then ublk_io_and_remove 8G -t null -q 4 --auto_zc --nthreads 8 --per_io_tasks & ublk_io_and_remove 256M -t loop -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" & ublk_io_and_remove 256M -t stripe -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback --nthreads 8 --per_io_tasks & + wait fi -wait _cleanup_test "stress" _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index f703fcfe9f7c..83148875a12c 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -12,7 +12,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ - test_vsyscall mov_ss_trap \ + test_vsyscall mov_ss_trap sigtrap_loop \ syscall_arg_fault fsgsbase_restore sigaltstack TARGETS_C_BOTHBITS += nx_stack TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ diff --git a/tools/testing/selftests/x86/sigtrap_loop.c b/tools/testing/selftests/x86/sigtrap_loop.c new file mode 100644 index 000000000000..9d065479e89f --- /dev/null +++ b/tools/testing/selftests/x86/sigtrap_loop.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 Intel Corporation + */ +#define _GNU_SOURCE + +#include <err.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ucontext.h> + +#ifdef __x86_64__ +# define REG_IP REG_RIP +#else +# define REG_IP REG_EIP +#endif + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); + + return; +} + +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t *)ctx_void; + static unsigned int loop_count_on_same_ip; + static unsigned long last_trap_ip; + + if (last_trap_ip == ctx->uc_mcontext.gregs[REG_IP]) { + printf("\tTrapped at %016lx\n", last_trap_ip); + + /* + * If the same IP is hit more than 10 times in a row, it is + * _considered_ an infinite loop. + */ + if (++loop_count_on_same_ip > 10) { + printf("[FAIL]\tDetected SIGTRAP infinite loop\n"); + exit(1); + } + + return; + } + + loop_count_on_same_ip = 0; + last_trap_ip = ctx->uc_mcontext.gregs[REG_IP]; + printf("\tTrapped at %016lx\n", last_trap_ip); +} + +int main(int argc, char *argv[]) +{ + sethandler(SIGTRAP, sigtrap, 0); + + /* + * Set the Trap Flag (TF) to single-step the test code, therefore to + * trigger a SIGTRAP signal after each instruction until the TF is + * cleared. + * + * Because the arithmetic flags are not significant here, the TF is + * set by pushing 0x302 onto the stack and then popping it into the + * flags register. + * + * Four instructions in the following asm code are executed with the + * TF set, thus the SIGTRAP handler is expected to run four times. + */ + printf("[RUN]\tSIGTRAP infinite loop detection\n"); + asm volatile( +#ifdef __x86_64__ + /* + * Avoid clobbering the redzone + * + * Equivalent to "sub $128, %rsp", however -128 can be encoded + * in a single byte immediate while 128 uses 4 bytes. + */ + "add $-128, %rsp\n\t" +#endif + "push $0x302\n\t" + "popf\n\t" + "nop\n\t" + "nop\n\t" + "push $0x202\n\t" + "popf\n\t" +#ifdef __x86_64__ + "sub $-128, %rsp\n\t" +#endif + ); + + printf("[OK]\tNo SIGTRAP infinite loop detected\n"); + return 0; +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index eec82775c5bf..222f0e894a0c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2572,6 +2572,8 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end, r = xa_reserve(&kvm->mem_attr_array, i, GFP_KERNEL_ACCOUNT); if (r) goto out_unlock; + + cond_resched(); } kvm_handle_gfn_range(kvm, &pre_set_range); @@ -2580,6 +2582,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end, r = xa_err(xa_store(&kvm->mem_attr_array, i, entry, GFP_KERNEL_ACCOUNT)); KVM_BUG_ON(r, kvm); + cond_resched(); } kvm_handle_gfn_range(kvm, &post_set_range); |