diff options
659 files changed, 16730 insertions, 5930 deletions
diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs index a5c28f606865..284e2dfa61cd 100644 --- a/Documentation/ABI/testing/debugfs-driver-habanalabs +++ b/Documentation/ABI/testing/debugfs-driver-habanalabs @@ -215,6 +215,17 @@ Description: Sets the skip reset on timeout option for the device. Value of "0" means device will be reset in case some CS has timed out, otherwise it will not be reset. +What: /sys/kernel/debug/habanalabs/hl<n>/state_dump +Date: Oct 2021 +KernelVersion: 5.15 +Contact: ynudelman@habana.ai +Description: Gets the state dump occurring on a CS timeout or failure. + State dump is used for debug and is created each time in case of + a problem in a CS execution, before reset. + Reading from the node returns the newest state dump available. + Writing an integer X discards X state dumps, so that the + next read would return X+1-st newest state dump. + What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err Date: Mar 2020 KernelVersion: 5.6 @@ -230,6 +241,14 @@ Description: Displays a list with information about the currently user pointers (user virtual addresses) that are pinned and mapped to DMA addresses +What: /sys/kernel/debug/habanalabs/hl<n>/userptr_lookup +Date: Aug 2021 +KernelVersion: 5.15 +Contact: ogabbay@kernel.org +Description: Allows to search for specific user pointers (user virtual + addresses) that are pinned and mapped to DMA addresses, and see + their resolution to the specific dma address. + What: /sys/kernel/debug/habanalabs/hl<n>/vm Date: Jan 2019 KernelVersion: 5.1 diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst index 35314b63008c..caa3c09a5c3f 100644 --- a/Documentation/admin-guide/README.rst +++ b/Documentation/admin-guide/README.rst @@ -259,7 +259,7 @@ Configuring the kernel Compiling the kernel -------------------- - - Make sure you have at least gcc 4.9 available. + - Make sure you have at least gcc 5.1 available. For more information, refer to :ref:`Documentation/process/changes.rst <changes>`. Please note that you can still run a.out user programs with this kernel. diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst index b66e3cae1472..c6f4ba2fb32d 100644 --- a/Documentation/core-api/cpu_hotplug.rst +++ b/Documentation/core-api/cpu_hotplug.rst @@ -2,12 +2,13 @@ CPU hotplug in the Kernel ========================= -:Date: December, 2016 +:Date: September, 2021 :Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>, - Rusty Russell <rusty@rustcorp.com.au>, - Srivatsa Vaddagiri <vatsa@in.ibm.com>, - Ashok Raj <ashok.raj@intel.com>, - Joel Schopp <jschopp@austin.ibm.com> + Rusty Russell <rusty@rustcorp.com.au>, + Srivatsa Vaddagiri <vatsa@in.ibm.com>, + Ashok Raj <ashok.raj@intel.com>, + Joel Schopp <jschopp@austin.ibm.com>, + Thomas Gleixner <tglx@linutronix.de> Introduction ============ @@ -158,100 +159,480 @@ at state ``CPUHP_OFFLINE``. This includes: * Once all services are migrated, kernel calls an arch specific routine ``__cpu_disable()`` to perform arch specific cleanup. -Using the hotplug API ---------------------- - -It is possible to receive notifications once a CPU is offline or onlined. This -might be important to certain drivers which need to perform some kind of setup -or clean up functions based on the number of available CPUs:: - - #include <linux/cpuhotplug.h> - - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "X/Y:online", - Y_online, Y_prepare_down); - -*X* is the subsystem and *Y* the particular driver. The *Y_online* callback -will be invoked during registration on all online CPUs. If an error -occurs during the online callback the *Y_prepare_down* callback will be -invoked on all CPUs on which the online callback was previously invoked. -After registration completed, the *Y_online* callback will be invoked -once a CPU is brought online and *Y_prepare_down* will be invoked when a -CPU is shutdown. All resources which were previously allocated in -*Y_online* should be released in *Y_prepare_down*. -The return value *ret* is negative if an error occurred during the -registration process. Otherwise a positive value is returned which -contains the allocated hotplug for dynamically allocated states -(*CPUHP_AP_ONLINE_DYN*). It will return zero for predefined states. - -The callback can be remove by invoking ``cpuhp_remove_state()``. In case of a -dynamically allocated state (*CPUHP_AP_ONLINE_DYN*) use the returned state. -During the removal of a hotplug state the teardown callback will be invoked. - -Multiple instances -~~~~~~~~~~~~~~~~~~ - -If a driver has multiple instances and each instance needs to perform the -callback independently then it is likely that a ''multi-state'' should be used. -First a multi-state state needs to be registered:: - - ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "X/Y:online, - Y_online, Y_prepare_down); - Y_hp_online = ret; - -The ``cpuhp_setup_state_multi()`` behaves similar to ``cpuhp_setup_state()`` -except it prepares the callbacks for a multi state and does not invoke -the callbacks. This is a one time setup. -Once a new instance is allocated, you need to register this new instance:: - - ret = cpuhp_state_add_instance(Y_hp_online, &d->node); - -This function will add this instance to your previously allocated -*Y_hp_online* state and invoke the previously registered callback -(*Y_online*) on all online CPUs. The *node* element is a ``struct -hlist_node`` member of your per-instance data structure. - -On removal of the instance:: - - cpuhp_state_remove_instance(Y_hp_online, &d->node) - -should be invoked which will invoke the teardown callback on all online -CPUs. - -Manual setup -~~~~~~~~~~~~ - -Usually it is handy to invoke setup and teardown callbacks on registration or -removal of a state because usually the operation needs to performed once a CPU -goes online (offline) and during initial setup (shutdown) of the driver. However -each registration and removal function is also available with a ``_nocalls`` -suffix which does not invoke the provided callbacks if the invocation of the -callbacks is not desired. During the manual setup (or teardown) the functions -``cpus_read_lock()`` and ``cpus_read_unlock()`` should be used to inhibit CPU -hotplug operations. - - -The ordering of the events --------------------------- - -The hotplug states are defined in ``include/linux/cpuhotplug.h``: - -* The states *CPUHP_OFFLINE* … *CPUHP_AP_OFFLINE* are invoked before the - CPU is up. -* The states *CPUHP_AP_OFFLINE* … *CPUHP_AP_ONLINE* are invoked - just the after the CPU has been brought up. The interrupts are off and - the scheduler is not yet active on this CPU. Starting with *CPUHP_AP_OFFLINE* - the callbacks are invoked on the target CPU. -* The states between *CPUHP_AP_ONLINE_DYN* and *CPUHP_AP_ONLINE_DYN_END* are - reserved for the dynamic allocation. -* The states are invoked in the reverse order on CPU shutdown starting with - *CPUHP_ONLINE* and stopping at *CPUHP_OFFLINE*. Here the callbacks are - invoked on the CPU that will be shutdown until *CPUHP_AP_OFFLINE*. - -A dynamically allocated state via *CPUHP_AP_ONLINE_DYN* is often enough. -However if an earlier invocation during the bring up or shutdown is required -then an explicit state should be acquired. An explicit state might also be -required if the hotplug event requires specific ordering in respect to -another hotplug event. + +The CPU hotplug API +=================== + +CPU hotplug state machine +------------------------- + +CPU hotplug uses a trivial state machine with a linear state space from +CPUHP_OFFLINE to CPUHP_ONLINE. Each state has a startup and a teardown +callback. + +When a CPU is onlined, the startup callbacks are invoked sequentially until +the state CPUHP_ONLINE is reached. They can also be invoked when the +callbacks of a state are set up or an instance is added to a multi-instance +state. + +When a CPU is offlined the teardown callbacks are invoked in the reverse +order sequentially until the state CPUHP_OFFLINE is reached. They can also +be invoked when the callbacks of a state are removed or an instance is +removed from a multi-instance state. + +If a usage site requires only a callback in one direction of the hotplug +operations (CPU online or CPU offline) then the other not-required callback +can be set to NULL when the state is set up. + +The state space is divided into three sections: + +* The PREPARE section + + The PREPARE section covers the state space from CPUHP_OFFLINE to + CPUHP_BRINGUP_CPU. + + The startup callbacks in this section are invoked before the CPU is + started during a CPU online operation. The teardown callbacks are invoked + after the CPU has become dysfunctional during a CPU offline operation. + + The callbacks are invoked on a control CPU as they can't obviously run on + the hotplugged CPU which is either not yet started or has become + dysfunctional already. + + The startup callbacks are used to setup resources which are required to + bring a CPU successfully online. The teardown callbacks are used to free + resources or to move pending work to an online CPU after the hotplugged + CPU became dysfunctional. + + The startup callbacks are allowed to fail. If a callback fails, the CPU + online operation is aborted and the CPU is brought down to the previous + state (usually CPUHP_OFFLINE) again. + + The teardown callbacks in this section are not allowed to fail. + +* The STARTING section + + The STARTING section covers the state space between CPUHP_BRINGUP_CPU + 1 + and CPUHP_AP_ONLINE. + + The startup callbacks in this section are invoked on the hotplugged CPU + with interrupts disabled during a CPU online operation in the early CPU + setup code. The teardown callbacks are invoked with interrupts disabled + on the hotplugged CPU during a CPU offline operation shortly before the + CPU is completely shut down. + + The callbacks in this section are not allowed to fail. + + The callbacks are used for low level hardware initialization/shutdown and + for core subsystems. + +* The ONLINE section + + The ONLINE section covers the state space between CPUHP_AP_ONLINE + 1 and + CPUHP_ONLINE. + + The startup callbacks in this section are invoked on the hotplugged CPU + during a CPU online operation. The teardown callbacks are invoked on the + hotplugged CPU during a CPU offline operation. + + The callbacks are invoked in the context of the per CPU hotplug thread, + which is pinned on the hotplugged CPU. The callbacks are invoked with + interrupts and preemption enabled. + + The callbacks are allowed to fail. When a callback fails the hotplug + operation is aborted and the CPU is brought back to the previous state. + +CPU online/offline operations +----------------------------- + +A successful online operation looks like this:: + + [CPUHP_OFFLINE] + [CPUHP_OFFLINE + 1]->startup() -> success + [CPUHP_OFFLINE + 2]->startup() -> success + [CPUHP_OFFLINE + 3] -> skipped because startup == NULL + ... + [CPUHP_BRINGUP_CPU]->startup() -> success + === End of PREPARE section + [CPUHP_BRINGUP_CPU + 1]->startup() -> success + ... + [CPUHP_AP_ONLINE]->startup() -> success + === End of STARTUP section + [CPUHP_AP_ONLINE + 1]->startup() -> success + ... + [CPUHP_ONLINE - 1]->startup() -> success + [CPUHP_ONLINE] + +A successful offline operation looks like this:: + + [CPUHP_ONLINE] + [CPUHP_ONLINE - 1]->teardown() -> success + ... + [CPUHP_AP_ONLINE + 1]->teardown() -> success + === Start of STARTUP section + [CPUHP_AP_ONLINE]->teardown() -> success + ... + [CPUHP_BRINGUP_ONLINE - 1]->teardown() + ... + === Start of PREPARE section + [CPUHP_BRINGUP_CPU]->teardown() + [CPUHP_OFFLINE + 3]->teardown() + [CPUHP_OFFLINE + 2] -> skipped because teardown == NULL + [CPUHP_OFFLINE + 1]->teardown() + [CPUHP_OFFLINE] + +A failed online operation looks like this:: + + [CPUHP_OFFLINE] + [CPUHP_OFFLINE + 1]->startup() -> success + [CPUHP_OFFLINE + 2]->startup() -> success + [CPUHP_OFFLINE + 3] -> skipped because startup == NULL + ... + [CPUHP_BRINGUP_CPU]->startup() -> success + === End of PREPARE section + [CPUHP_BRINGUP_CPU + 1]->startup() -> success + ... + [CPUHP_AP_ONLINE]->startup() -> success + === End of STARTUP section + [CPUHP_AP_ONLINE + 1]->startup() -> success + --- + [CPUHP_AP_ONLINE + N]->startup() -> fail + [CPUHP_AP_ONLINE + (N - 1)]->teardown() + ... + [CPUHP_AP_ONLINE + 1]->teardown() + === Start of STARTUP section + [CPUHP_AP_ONLINE]->teardown() + ... + [CPUHP_BRINGUP_ONLINE - 1]->teardown() + ... + === Start of PREPARE section + [CPUHP_BRINGUP_CPU]->teardown() + [CPUHP_OFFLINE + 3]->teardown() + [CPUHP_OFFLINE + 2] -> skipped because teardown == NULL + [CPUHP_OFFLINE + 1]->teardown() + [CPUHP_OFFLINE] + +A failed offline operation looks like this:: + + [CPUHP_ONLINE] + [CPUHP_ONLINE - 1]->teardown() -> success + ... + [CPUHP_ONLINE - N]->teardown() -> fail + [CPUHP_ONLINE - (N - 1)]->startup() + ... + [CPUHP_ONLINE - 1]->startup() + [CPUHP_ONLINE] + +Recursive failures cannot be handled sensibly. Look at the following +example of a recursive fail due to a failed offline operation: :: + + [CPUHP_ONLINE] + [CPUHP_ONLINE - 1]->teardown() -> success + ... + [CPUHP_ONLINE - N]->teardown() -> fail + [CPUHP_ONLINE - (N - 1)]->startup() -> success + [CPUHP_ONLINE - (N - 2)]->startup() -> fail + +The CPU hotplug state machine stops right here and does not try to go back +down again because that would likely result in an endless loop:: + + [CPUHP_ONLINE - (N - 1)]->teardown() -> success + [CPUHP_ONLINE - N]->teardown() -> fail + [CPUHP_ONLINE - (N - 1)]->startup() -> success + [CPUHP_ONLINE - (N - 2)]->startup() -> fail + [CPUHP_ONLINE - (N - 1)]->teardown() -> success + [CPUHP_ONLINE - N]->teardown() -> fail + +Lather, rinse and repeat. In this case the CPU left in state:: + + [CPUHP_ONLINE - (N - 1)] + +which at least lets the system make progress and gives the user a chance to +debug or even resolve the situation. + +Allocating a state +------------------ + +There are two ways to allocate a CPU hotplug state: + +* Static allocation + + Static allocation has to be used when the subsystem or driver has + ordering requirements versus other CPU hotplug states. E.g. the PERF core + startup callback has to be invoked before the PERF driver startup + callbacks during a CPU online operation. During a CPU offline operation + the driver teardown callbacks have to be invoked before the core teardown + callback. The statically allocated states are described by constants in + the cpuhp_state enum which can be found in include/linux/cpuhotplug.h. + + Insert the state into the enum at the proper place so the ordering + requirements are fulfilled. The state constant has to be used for state + setup and removal. + + Static allocation is also required when the state callbacks are not set + up at runtime and are part of the initializer of the CPU hotplug state + array in kernel/cpu.c. + +* Dynamic allocation + + When there are no ordering requirements for the state callbacks then + dynamic allocation is the preferred method. The state number is allocated + by the setup function and returned to the caller on success. + + Only the PREPARE and ONLINE sections provide a dynamic allocation + range. The STARTING section does not as most of the callbacks in that + section have explicit ordering requirements. + +Setup of a CPU hotplug state +---------------------------- + +The core code provides the following functions to setup a state: + +* cpuhp_setup_state(state, name, startup, teardown) +* cpuhp_setup_state_nocalls(state, name, startup, teardown) +* cpuhp_setup_state_cpuslocked(state, name, startup, teardown) +* cpuhp_setup_state_nocalls_cpuslocked(state, name, startup, teardown) + +For cases where a driver or a subsystem has multiple instances and the same +CPU hotplug state callbacks need to be invoked for each instance, the CPU +hotplug core provides multi-instance support. The advantage over driver +specific instance lists is that the instance related functions are fully +serialized against CPU hotplug operations and provide the automatic +invocations of the state callbacks on add and removal. To set up such a +multi-instance state the following function is available: + +* cpuhp_setup_state_multi(state, name, startup, teardown) + +The @state argument is either a statically allocated state or one of the +constants for dynamically allocated states - CPUHP_PREPARE_DYN, +CPUHP_ONLINE_DYN - depending on the state section (PREPARE, ONLINE) for +which a dynamic state should be allocated. + +The @name argument is used for sysfs output and for instrumentation. The +naming convention is "subsys:mode" or "subsys/driver:mode", +e.g. "perf:mode" or "perf/x86:mode". The common mode names are: + +======== ======================================================= +prepare For states in the PREPARE section + +dead For states in the PREPARE section which do not provide + a startup callback + +starting For states in the STARTING section + +dying For states in the STARTING section which do not provide + a startup callback + +online For states in the ONLINE section + +offline For states in the ONLINE section which do not provide + a startup callback +======== ======================================================= + +As the @name argument is only used for sysfs and instrumentation other mode +descriptors can be used as well if they describe the nature of the state +better than the common ones. + +Examples for @name arguments: "perf/online", "perf/x86:prepare", +"RCU/tree:dying", "sched/waitempty" + +The @startup argument is a function pointer to the callback which should be +invoked during a CPU online operation. If the usage site does not require a +startup callback set the pointer to NULL. + +The @teardown argument is a function pointer to the callback which should +be invoked during a CPU offline operation. If the usage site does not +require a teardown callback set the pointer to NULL. + +The functions differ in the way how the installed callbacks are treated: + + * cpuhp_setup_state_nocalls(), cpuhp_setup_state_nocalls_cpuslocked() + and cpuhp_setup_state_multi() only install the callbacks + + * cpuhp_setup_state() and cpuhp_setup_state_cpuslocked() install the + callbacks and invoke the @startup callback (if not NULL) for all online + CPUs which have currently a state greater than the newly installed + state. Depending on the state section the callback is either invoked on + the current CPU (PREPARE section) or on each online CPU (ONLINE + section) in the context of the CPU's hotplug thread. + + If a callback fails for CPU N then the teardown callback for CPU + 0 .. N-1 is invoked to rollback the operation. The state setup fails, + the callbacks for the state are not installed and in case of dynamic + allocation the allocated state is freed. + +The state setup and the callback invocations are serialized against CPU +hotplug operations. If the setup function has to be called from a CPU +hotplug read locked region, then the _cpuslocked() variants have to be +used. These functions cannot be used from within CPU hotplug callbacks. + +The function return values: + ======== =================================================================== + 0 Statically allocated state was successfully set up + + >0 Dynamically allocated state was successfully set up. + + The returned number is the state number which was allocated. If + the state callbacks have to be removed later, e.g. module + removal, then this number has to be saved by the caller and used + as @state argument for the state remove function. For + multi-instance states the dynamically allocated state number is + also required as @state argument for the instance add/remove + operations. + + <0 Operation failed + ======== =================================================================== + +Removal of a CPU hotplug state +------------------------------ + +To remove a previously set up state, the following functions are provided: + +* cpuhp_remove_state(state) +* cpuhp_remove_state_nocalls(state) +* cpuhp_remove_state_nocalls_cpuslocked(state) +* cpuhp_remove_multi_state(state) + +The @state argument is either a statically allocated state or the state +number which was allocated in the dynamic range by cpuhp_setup_state*(). If +the state is in the dynamic range, then the state number is freed and +available for dynamic allocation again. + +The functions differ in the way how the installed callbacks are treated: + + * cpuhp_remove_state_nocalls(), cpuhp_remove_state_nocalls_cpuslocked() + and cpuhp_remove_multi_state() only remove the callbacks. + + * cpuhp_remove_state() removes the callbacks and invokes the teardown + callback (if not NULL) for all online CPUs which have currently a state + greater than the removed state. Depending on the state section the + callback is either invoked on the current CPU (PREPARE section) or on + each online CPU (ONLINE section) in the context of the CPU's hotplug + thread. + + In order to complete the removal, the teardown callback should not fail. + +The state removal and the callback invocations are serialized against CPU +hotplug operations. If the remove function has to be called from a CPU +hotplug read locked region, then the _cpuslocked() variants have to be +used. These functions cannot be used from within CPU hotplug callbacks. + +If a multi-instance state is removed then the caller has to remove all +instances first. + +Multi-Instance state instance management +---------------------------------------- + +Once the multi-instance state is set up, instances can be added to the +state: + + * cpuhp_state_add_instance(state, node) + * cpuhp_state_add_instance_nocalls(state, node) + +The @state argument is either a statically allocated state or the state +number which was allocated in the dynamic range by cpuhp_setup_state_multi(). + +The @node argument is a pointer to an hlist_node which is embedded in the +instance's data structure. The pointer is handed to the multi-instance +state callbacks and can be used by the callback to retrieve the instance +via container_of(). + +The functions differ in the way how the installed callbacks are treated: + + * cpuhp_state_add_instance_nocalls() and only adds the instance to the + multi-instance state's node list. + + * cpuhp_state_add_instance() adds the instance and invokes the startup + callback (if not NULL) associated with @state for all online CPUs which + have currently a state greater than @state. The callback is only + invoked for the to be added instance. Depending on the state section + the callback is either invoked on the current CPU (PREPARE section) or + on each online CPU (ONLINE section) in the context of the CPU's hotplug + thread. + + If a callback fails for CPU N then the teardown callback for CPU + 0 .. N-1 is invoked to rollback the operation, the function fails and + the instance is not added to the node list of the multi-instance state. + +To remove an instance from the state's node list these functions are +available: + + * cpuhp_state_remove_instance(state, node) + * cpuhp_state_remove_instance_nocalls(state, node) + +The arguments are the same as for the the cpuhp_state_add_instance*() +variants above. + +The functions differ in the way how the installed callbacks are treated: + + * cpuhp_state_remove_instance_nocalls() only removes the instance from the + state's node list. + + * cpuhp_state_remove_instance() removes the instance and invokes the + teardown callback (if not NULL) associated with @state for all online + CPUs which have currently a state greater than @state. The callback is + only invoked for the to be removed instance. Depending on the state + section the callback is either invoked on the current CPU (PREPARE + section) or on each online CPU (ONLINE section) in the context of the + CPU's hotplug thread. + + In order to complete the removal, the teardown callback should not fail. + +The node list add/remove operations and the callback invocations are +serialized against CPU hotplug operations. These functions cannot be used +from within CPU hotplug callbacks and CPU hotplug read locked regions. + +Examples +-------- + +Setup and teardown a statically allocated state in the STARTING section for +notifications on online and offline operations:: + + ret = cpuhp_setup_state(CPUHP_SUBSYS_STARTING, "subsys:starting", subsys_cpu_starting, subsys_cpu_dying); + if (ret < 0) + return ret; + .... + cpuhp_remove_state(CPUHP_SUBSYS_STARTING); + +Setup and teardown a dynamically allocated state in the ONLINE section +for notifications on offline operations:: + + state = cpuhp_setup_state(CPUHP_ONLINE_DYN, "subsys:offline", NULL, subsys_cpu_offline); + if (state < 0) + return state; + .... + cpuhp_remove_state(state); + +Setup and teardown a dynamically allocated state in the ONLINE section +for notifications on online operations without invoking the callbacks:: + + state = cpuhp_setup_state_nocalls(CPUHP_ONLINE_DYN, "subsys:online", subsys_cpu_online, NULL); + if (state < 0) + return state; + .... + cpuhp_remove_state_nocalls(state); + +Setup, use and teardown a dynamically allocated multi-instance state in the +ONLINE section for notifications on online and offline operation:: + + state = cpuhp_setup_state_multi(CPUHP_ONLINE_DYN, "subsys:online", subsys_cpu_online, subsys_cpu_offline); + if (state < 0) + return state; + .... + ret = cpuhp_state_add_instance(state, &inst1->node); + if (ret) + return ret; + .... + ret = cpuhp_state_add_instance(state, &inst2->node); + if (ret) + return ret; + .... + cpuhp_remove_instance(state, &inst1->node); + .... + cpuhp_remove_instance(state, &inst2->node); + .... + remove_multi_state(state); + Testing of hotplug states ========================= diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst index 2a7444e3a4c2..2e7186805148 100644 --- a/Documentation/core-api/kernel-api.rst +++ b/Documentation/core-api/kernel-api.rst @@ -315,6 +315,9 @@ Block Devices .. kernel-doc:: block/genhd.c :export: +.. kernel-doc:: block/bdev.c + :export: + Char devices ============ diff --git a/Documentation/devicetree/bindings/arm/tegra.yaml b/Documentation/devicetree/bindings/arm/tegra.yaml index b962fa6d649c..d79d36ac0c44 100644 --- a/Documentation/devicetree/bindings/arm/tegra.yaml +++ b/Documentation/devicetree/bindings/arm/tegra.yaml @@ -54,7 +54,7 @@ properties: - const: toradex,apalis_t30 - const: nvidia,tegra30 - items: - - const: toradex,apalis_t30-eval-v1.1 + - const: toradex,apalis_t30-v1.1-eval - const: toradex,apalis_t30-eval - const: toradex,apalis_t30-v1.1 - const: toradex,apalis_t30 diff --git a/Documentation/devicetree/bindings/auxdisplay/hit,hd44780.yaml b/Documentation/devicetree/bindings/auxdisplay/hit,hd44780.yaml index 9222b06e93a0..fde07e4b119d 100644 --- a/Documentation/devicetree/bindings/auxdisplay/hit,hd44780.yaml +++ b/Documentation/devicetree/bindings/auxdisplay/hit,hd44780.yaml @@ -12,7 +12,10 @@ maintainers: description: The Hitachi HD44780 Character LCD Controller is commonly used on character LCDs that can display one or more lines of text. It exposes an M6800 bus - interface, which can be used in either 4-bit or 8-bit mode. + interface, which can be used in either 4-bit or 8-bit mode. By using a + GPIO expander it is possible to use the driver with one of the popular I2C + expander boards based on the PCF8574 available for these displays. For + an example see below. properties: compatible: @@ -94,3 +97,29 @@ examples: display-height-chars = <2>; display-width-chars = <16>; }; + - | + #include <dt-bindings/gpio/gpio.h> + i2c { + #address-cells = <1>; + #size-cells = <0>; + + pcf8574: pcf8574@27 { + compatible = "nxp,pcf8574"; + reg = <0x27>; + gpio-controller; + #gpio-cells = <2>; + }; + }; + hd44780 { + compatible = "hit,hd44780"; + display-height-chars = <2>; + display-width-chars = <16>; + data-gpios = <&pcf8574 4 0>, + <&pcf8574 5 0>, + <&pcf8574 6 0>, + <&pcf8574 7 0>; + enable-gpios = <&pcf8574 2 0>; + rs-gpios = <&pcf8574 0 0>; + rw-gpios = <&pcf8574 1 0>; + backlight-gpios = <&pcf8574 3 0>; + }; diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt b/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt index fbb59c9ddda6..78044c340e20 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt @@ -9,7 +9,7 @@ function block. All DISP device tree nodes must be siblings to the central MMSYS_CONFIG node. For a description of the MMSYS_CONFIG binding, see -Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt. +Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.yaml. DISP function blocks ==================== diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml index 4265399bb154..c851770bbdf2 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml @@ -14,10 +14,10 @@ allOf: properties: compatible: - oneOf: - - const: qcom,dsi-phy-7nm - - const: qcom,dsi-phy-7nm-8150 - - const: qcom,sc7280-dsi-phy-7nm + enum: + - qcom,dsi-phy-7nm + - qcom,dsi-phy-7nm-8150 + - qcom,sc7280-dsi-phy-7nm reg: items: diff --git a/Documentation/devicetree/bindings/gpio/gpio-virtio.yaml b/Documentation/devicetree/bindings/gpio/gpio-virtio.yaml new file mode 100644 index 000000000000..601d85754577 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/gpio-virtio.yaml @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/gpio/gpio-virtio.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Virtio GPIO controller + +maintainers: + - Viresh Kumar <viresh.kumar@linaro.org> + +allOf: + - $ref: /schemas/virtio/virtio-device.yaml# + +description: + Virtio GPIO controller, see /schemas/virtio/virtio-device.yaml for more + details. + +properties: + $nodename: + const: gpio + + compatible: + const: virtio,device29 + + gpio-controller: true + + "#gpio-cells": + const: 2 + + interrupt-controller: true + + "#interrupt-cells": + const: 2 + +required: + - compatible + - gpio-controller + - "#gpio-cells" + +unevaluatedProperties: false + +examples: + - | + virtio@3000 { + compatible = "virtio,mmio"; + reg = <0x3000 0x100>; + interrupts = <41>; + + gpio { + compatible = "virtio,device29"; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/i2c/i2c-virtio.yaml b/Documentation/devicetree/bindings/i2c/i2c-virtio.yaml new file mode 100644 index 000000000000..7d87ed855301 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/i2c-virtio.yaml @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/i2c/i2c-virtio.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Virtio I2C Adapter + +maintainers: + - Viresh Kumar <viresh.kumar@linaro.org> + +allOf: + - $ref: /schemas/i2c/i2c-controller.yaml# + - $ref: /schemas/virtio/virtio-device.yaml# + +description: + Virtio I2C device, see /schemas/virtio/virtio-device.yaml for more details. + +properties: + $nodename: + const: i2c + + compatible: + const: virtio,device22 + +required: + - compatible + +unevaluatedProperties: false + +examples: + - | + virtio@3000 { + compatible = "virtio,mmio"; + reg = <0x3000 0x100>; + interrupts = <41>; + + i2c { + compatible = "virtio,device22"; + + #address-cells = <1>; + #size-cells = <0>; + + light-sensor@20 { + compatible = "dynaimage,al3320a"; + reg = <0x20>; + }; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml b/Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml index cffd02028d02..d74f2002409e 100644 --- a/Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml +++ b/Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml @@ -29,6 +29,8 @@ properties: description: Regulator for the LRADC reference voltage + wakeup-source: true + patternProperties: "^button-[0-9]+$": type: object diff --git a/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.txt b/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.txt deleted file mode 100644 index 6cd08bca2c66..000000000000 --- a/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.txt +++ /dev/null @@ -1,55 +0,0 @@ -Qualcomm PM8941 PMIC Power Key - -PROPERTIES - -- compatible: - Usage: required - Value type: <string> - Definition: must be one of: - "qcom,pm8941-pwrkey" - "qcom,pm8941-resin" - "qcom,pmk8350-pwrkey" - "qcom,pmk8350-resin" - -- reg: - Usage: required - Value type: <prop-encoded-array> - Definition: base address of registers for block - -- interrupts: - Usage: required - Value type: <prop-encoded-array> - Definition: key change interrupt; The format of the specifier is - defined by the binding document describing the node's - interrupt parent. - -- debounce: - Usage: optional - Value type: <u32> - Definition: time in microseconds that key must be pressed or released - for state change interrupt to trigger. - -- bias-pull-up: - Usage: optional - Value type: <empty> - Definition: presence of this property indicates that the KPDPWR_N pin - should be configured for pull up. - -- linux,code: - Usage: optional - Value type: <u32> - Definition: The input key-code associated with the power key. - Use the linux event codes defined in - include/dt-bindings/input/linux-event-codes.h - When property is omitted KEY_POWER is assumed. - -EXAMPLE - - pwrkey@800 { - compatible = "qcom,pm8941-pwrkey"; - reg = <0x800>; - interrupts = <0x0 0x8 0 IRQ_TYPE_EDGE_BOTH>; - debounce = <15625>; - bias-pull-up; - linux,code = <KEY_POWER>; - }; diff --git a/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.yaml b/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.yaml new file mode 100644 index 000000000000..62314a5fdce5 --- /dev/null +++ b/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.yaml @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/input/qcom,pm8941-pwrkey.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm PM8941 PMIC Power Key + +maintainers: + - Courtney Cavin <courtney.cavin@sonymobile.com> + - Vinod Koul <vkoul@kernel.org> + +allOf: + - $ref: input.yaml# + +properties: + compatible: + enum: + - qcom,pm8941-pwrkey + - qcom,pm8941-resin + - qcom,pmk8350-pwrkey + - qcom,pmk8350-resin + + interrupts: + maxItems: 1 + + debounce: + description: | + Time in microseconds that key must be pressed or + released for state change interrupt to trigger. + $ref: /schemas/types.yaml#/definitions/uint32 + + bias-pull-up: + description: | + Presence of this property indicates that the KPDPWR_N + pin should be configured for pull up. + $ref: /schemas/types.yaml#/definitions/flag + + linux,code: + description: | + The input key-code associated with the power key. + Use the linux event codes defined in + include/dt-bindings/input/linux-event-codes.h + When property is omitted KEY_POWER is assumed. + +required: + - compatible + - interrupts + +unevaluatedProperties: false +... diff --git a/Documentation/devicetree/bindings/input/regulator-haptic.txt b/Documentation/devicetree/bindings/input/regulator-haptic.txt deleted file mode 100644 index 3ed1c7eb2f97..000000000000 --- a/Documentation/devicetree/bindings/input/regulator-haptic.txt +++ /dev/null @@ -1,21 +0,0 @@ -* Regulator Haptic Device Tree Bindings - -Required Properties: - - compatible : Should be "regulator-haptic" - - haptic-supply : Power supply to the haptic motor. - [*] refer Documentation/devicetree/bindings/regulator/regulator.txt - - - max-microvolt : The maximum voltage value supplied to the haptic motor. - [The unit of the voltage is a micro] - - - min-microvolt : The minimum voltage value supplied to the haptic motor. - [The unit of the voltage is a micro] - -Example: - - haptics { - compatible = "regulator-haptic"; - haptic-supply = <&motor_regulator>; - max-microvolt = <2700000>; - min-microvolt = <1100000>; - }; diff --git a/Documentation/devicetree/bindings/input/regulator-haptic.yaml b/Documentation/devicetree/bindings/input/regulator-haptic.yaml new file mode 100644 index 000000000000..b1ae72f9cd2d --- /dev/null +++ b/Documentation/devicetree/bindings/input/regulator-haptic.yaml @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/input/regulator-haptic.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Regulator Haptic Device Tree Bindings + +maintainers: + - Jaewon Kim <jaewon02.kim@samsung.com> + +properties: + compatible: + const: regulator-haptic + + haptic-supply: + description: > + Power supply to the haptic motor + + max-microvolt: + description: > + The maximum voltage value supplied to the haptic motor + + min-microvolt: + description: > + The minimum voltage value supplied to the haptic motor + +required: + - compatible + - haptic-supply + - max-microvolt + - min-microvolt + +additionalProperties: false + +examples: + - | + haptics { + compatible = "regulator-haptic"; + haptic-supply = <&motor_regulator>; + max-microvolt = <2700000>; + min-microvolt = <1100000>; + }; diff --git a/Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml b/Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml new file mode 100644 index 000000000000..9df685bdc5db --- /dev/null +++ b/Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/input/touchscreen/chipone,icn8318.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ChipOne ICN8318 Touchscreen Controller Device Tree Bindings + +maintainers: + - Dmitry Torokhov <dmitry.torokhov@gmail.com> + +allOf: + - $ref: touchscreen.yaml# + +properties: + compatible: + const: chipone,icn8318 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + wake-gpios: + maxItems: 1 + +unevaluatedProperties: false + +required: + - compatible + - reg + - interrupts + - wake-gpios + - touchscreen-size-x + - touchscreen-size-y + +examples: + - | + #include <dt-bindings/gpio/gpio.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + touchscreen@40 { + compatible = "chipone,icn8318"; + reg = <0x40>; + interrupt-parent = <&pio>; + interrupts = <9 IRQ_TYPE_EDGE_FALLING>; /* EINT9 (PG9) */ + pinctrl-names = "default"; + pinctrl-0 = <&ts_wake_pin_p66>; + wake-gpios = <&pio 1 3 GPIO_ACTIVE_HIGH>; /* PB3 */ + touchscreen-size-x = <800>; + touchscreen-size-y = <480>; + touchscreen-inverted-x; + touchscreen-swapped-x-y; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt b/Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt deleted file mode 100644 index 38b0603f65f3..000000000000 --- a/Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt +++ /dev/null @@ -1,44 +0,0 @@ -* ChipOne icn8318 I2C touchscreen controller - -Required properties: - - compatible : "chipone,icn8318" - - reg : I2C slave address of the chip (0x40) - - interrupts : interrupt specification for the icn8318 interrupt - - wake-gpios : GPIO specification for the WAKE input - - touchscreen-size-x : horizontal resolution of touchscreen (in pixels) - - touchscreen-size-y : vertical resolution of touchscreen (in pixels) - -Optional properties: - - pinctrl-names : should be "default" - - pinctrl-0: : a phandle pointing to the pin settings for the - control gpios - - touchscreen-fuzz-x : horizontal noise value of the absolute input - device (in pixels) - - touchscreen-fuzz-y : vertical noise value of the absolute input - device (in pixels) - - touchscreen-inverted-x : X axis is inverted (boolean) - - touchscreen-inverted-y : Y axis is inverted (boolean) - - touchscreen-swapped-x-y : X and Y axis are swapped (boolean) - Swapping is done after inverting the axis - -Example: - -i2c@00000000 { - /* ... */ - - chipone_icn8318@40 { - compatible = "chipone,icn8318"; - reg = <0x40>; - interrupt-parent = <&pio>; - interrupts = <9 IRQ_TYPE_EDGE_FALLING>; /* EINT9 (PG9) */ - pinctrl-names = "default"; - pinctrl-0 = <&ts_wake_pin_p66>; - wake-gpios = <&pio 1 3 GPIO_ACTIVE_HIGH>; /* PB3 */ - touchscreen-size-x = <800>; - touchscreen-size-y = <480>; - touchscreen-inverted-x; - touchscreen-swapped-x-y; - }; - - /* ... */ -}; diff --git a/Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml b/Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml new file mode 100644 index 000000000000..f9998edbff70 --- /dev/null +++ b/Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/input/touchscreen/pixcir,pixcir_ts.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Pixcir Touchscreen Controller Device Tree Bindings + +maintainers: + - Dmitry Torokhov <dmitry.torokhov@gmail.com> + +allOf: + - $ref: touchscreen.yaml# + +properties: + compatible: + enum: + - pixcir,pixcir_ts + - pixcir,pixcir_tangoc + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + attb-gpio: + maxItems: 1 + + reset-gpios: + maxItems: 1 + + enable-gpios: + maxItems: 1 + + wake-gpios: + maxItems: 1 + +unevaluatedProperties: false + +required: + - compatible + - reg + - interrupts + - attb-gpio + - touchscreen-size-x + - touchscreen-size-y + +examples: + - | + #include <dt-bindings/gpio/gpio.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + touchscreen@5c { + compatible = "pixcir,pixcir_ts"; + reg = <0x5c>; + interrupts = <2 0>; + attb-gpio = <&gpf 2 0 2>; + touchscreen-size-x = <800>; + touchscreen-size-y = <600>; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt b/Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt deleted file mode 100644 index 697a3e7831e7..000000000000 --- a/Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt +++ /dev/null @@ -1,31 +0,0 @@ -* Pixcir I2C touchscreen controllers - -Required properties: -- compatible: must be "pixcir,pixcir_ts" or "pixcir,pixcir_tangoc" -- reg: I2C address of the chip -- interrupts: interrupt to which the chip is connected -- attb-gpio: GPIO connected to the ATTB line of the chip -- touchscreen-size-x: horizontal resolution of touchscreen (in pixels) -- touchscreen-size-y: vertical resolution of touchscreen (in pixels) - -Optional properties: -- reset-gpios: GPIO connected to the RESET line of the chip -- enable-gpios: GPIO connected to the ENABLE line of the chip -- wake-gpios: GPIO connected to the WAKE line of the chip - -Example: - - i2c@00000000 { - /* ... */ - - pixcir_ts@5c { - compatible = "pixcir,pixcir_ts"; - reg = <0x5c>; - interrupts = <2 0>; - attb-gpio = <&gpf 2 0 2>; - touchscreen-size-x = <800>; - touchscreen-size-y = <600>; - }; - - /* ... */ - }; diff --git a/Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml b/Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml new file mode 100644 index 000000000000..938aab016cc2 --- /dev/null +++ b/Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml @@ -0,0 +1,128 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/input/touchscreen/ti,tsc2005.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Texas Instruments TSC2004 and TSC2005 touchscreen controller bindings + +maintainers: + - Marek Vasut <marex@denx.de> + - Michael Welling <mwelling@ieee.org> + +properties: + $nodename: + pattern: "^touchscreen(@.*)?$" + + compatible: + enum: + - ti,tsc2004 + - ti,tsc2005 + + reg: + maxItems: 1 + description: | + I2C address when used on the I2C bus, or the SPI chip select index + when used on the SPI bus + + interrupts: + maxItems: 1 + + reset-gpios: + maxItems: 1 + description: GPIO specifier for the controller reset line + + spi-max-frequency: + description: TSC2005 SPI bus clock frequency. + maximum: 25000000 + + ti,x-plate-ohms: + description: resistance of the touchscreen's X plates in ohm (defaults to 280) + + ti,esd-recovery-timeout-ms: + description: | + if the touchscreen does not respond after the configured time + (in milli seconds), the driver will reset it. This is disabled + by default. + + vio-supply: + description: Regulator specifier + + touchscreen-fuzz-pressure: true + touchscreen-fuzz-x: true + touchscreen-fuzz-y: true + touchscreen-max-pressure: true + touchscreen-size-x: true + touchscreen-size-y: true + +allOf: + - $ref: touchscreen.yaml# + - if: + properties: + compatible: + contains: + const: ti,tsc2004 + then: + properties: + spi-max-frequency: false + +additionalProperties: false + +required: + - compatible + - reg + - interrupts + +examples: + - | + #include <dt-bindings/interrupt-controller/irq.h> + #include <dt-bindings/gpio/gpio.h> + i2c { + #address-cells = <1>; + #size-cells = <0>; + touchscreen@48 { + compatible = "ti,tsc2004"; + reg = <0x48>; + vio-supply = <&vio>; + + reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>; + interrupts-extended = <&gpio1 27 IRQ_TYPE_EDGE_RISING>; + + touchscreen-fuzz-x = <4>; + touchscreen-fuzz-y = <7>; + touchscreen-fuzz-pressure = <2>; + touchscreen-size-x = <4096>; + touchscreen-size-y = <4096>; + touchscreen-max-pressure = <2048>; + + ti,x-plate-ohms = <280>; + ti,esd-recovery-timeout-ms = <8000>; + }; + }; + - | + #include <dt-bindings/interrupt-controller/irq.h> + #include <dt-bindings/gpio/gpio.h> + spi { + #address-cells = <1>; + #size-cells = <0>; + touchscreen@0 { + compatible = "ti,tsc2005"; + spi-max-frequency = <6000000>; + reg = <0>; + + vio-supply = <&vio>; + + reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>; /* 104 */ + interrupts-extended = <&gpio4 4 IRQ_TYPE_EDGE_RISING>; /* 100 */ + + touchscreen-fuzz-x = <4>; + touchscreen-fuzz-y = <7>; + touchscreen-fuzz-pressure = <2>; + touchscreen-size-x = <4096>; + touchscreen-size-y = <4096>; + touchscreen-max-pressure = <2048>; + + ti,x-plate-ohms = <280>; + ti,esd-recovery-timeout-ms = <8000>; + }; + }; diff --git a/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt b/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt deleted file mode 100644 index b80c04b0e5c0..000000000000 --- a/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt +++ /dev/null @@ -1,64 +0,0 @@ -* Texas Instruments tsc2004 and tsc2005 touchscreen controllers - -Required properties: - - compatible : "ti,tsc2004" or "ti,tsc2005" - - reg : Device address - - interrupts : IRQ specifier - - spi-max-frequency : Maximum SPI clocking speed of the device - (for tsc2005) - -Optional properties: - - vio-supply : Regulator specifier - - reset-gpios : GPIO specifier for the controller reset line - - ti,x-plate-ohms : integer, resistance of the touchscreen's X plates - in ohm (defaults to 280) - - ti,esd-recovery-timeout-ms : integer, if the touchscreen does not respond after - the configured time (in milli seconds), the driver - will reset it. This is disabled by default. - - properties defined in touchscreen.txt - -Example: - -&i2c3 { - tsc2004@48 { - compatible = "ti,tsc2004"; - reg = <0x48>; - vio-supply = <&vio>; - - reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>; - interrupts-extended = <&gpio1 27 IRQ_TYPE_EDGE_RISING>; - - touchscreen-fuzz-x = <4>; - touchscreen-fuzz-y = <7>; - touchscreen-fuzz-pressure = <2>; - touchscreen-size-x = <4096>; - touchscreen-size-y = <4096>; - touchscreen-max-pressure = <2048>; - - ti,x-plate-ohms = <280>; - ti,esd-recovery-timeout-ms = <8000>; - }; -} - -&mcspi1 { - tsc2005@0 { - compatible = "ti,tsc2005"; - spi-max-frequency = <6000000>; - reg = <0>; - - vio-supply = <&vio>; - - reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>; /* 104 */ - interrupts-extended = <&gpio4 4 IRQ_TYPE_EDGE_RISING>; /* 100 */ - - touchscreen-fuzz-x = <4>; - touchscreen-fuzz-y = <7>; - touchscreen-fuzz-pressure = <2>; - touchscreen-size-x = <4096>; - touchscreen-size-y = <4096>; - touchscreen-max-pressure = <2048>; - - ti,x-plate-ohms = <280>; - ti,esd-recovery-timeout-ms = <8000>; - }; -} diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml index 7f2578d48e3f..9eb4bb529ad5 100644 --- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml +++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml @@ -19,7 +19,9 @@ properties: - const: allwinner,sun8i-v3s-emac - const: allwinner,sun50i-a64-emac - items: - - const: allwinner,sun50i-h6-emac + - enum: + - allwinner,sun20i-d1-emac + - allwinner,sun50i-h6-emac - const: allwinner,sun50i-a64-emac reg: diff --git a/Documentation/devicetree/bindings/power/reset/qcom,pon.txt b/Documentation/devicetree/bindings/power/reset/qcom,pon.txt deleted file mode 100644 index 0c0dc3a1e693..000000000000 --- a/Documentation/devicetree/bindings/power/reset/qcom,pon.txt +++ /dev/null @@ -1,49 +0,0 @@ -Qualcomm PON Device - -The Power On device for Qualcomm PM8xxx is MFD supporting pwrkey -and resin along with the Android reboot-mode. - -This DT node has pwrkey and resin as sub nodes. - -Required Properties: --compatible: Must be one of: - "qcom,pm8916-pon" - "qcom,pms405-pon" - "qcom,pm8998-pon" - --reg: Specifies the physical address of the pon register - -Optional subnode: --pwrkey: Specifies the subnode pwrkey and should follow the - qcom,pm8941-pwrkey.txt description. --resin: Specifies the subnode resin and should follow the - qcom,pm8xxx-pwrkey.txt description. - -The rest of the properties should follow the generic reboot-mode description -found in reboot-mode.txt - -Example: - - pon@800 { - compatible = "qcom,pm8916-pon"; - - reg = <0x800>; - mode-bootloader = <0x2>; - mode-recovery = <0x1>; - - pwrkey { - compatible = "qcom,pm8941-pwrkey"; - interrupts = <0x0 0x8 0 IRQ_TYPE_EDGE_BOTH>; - debounce = <15625>; - bias-pull-up; - linux,code = <KEY_POWER>; - }; - - resin { - compatible = "qcom,pm8941-resin"; - interrupts = <0x0 0x8 1 IRQ_TYPE_EDGE_BOTH>; - debounce = <15625>; - bias-pull-up; - linux,code = <KEY_VOLUMEDOWN>; - }; - }; diff --git a/Documentation/devicetree/bindings/power/reset/qcom,pon.yaml b/Documentation/devicetree/bindings/power/reset/qcom,pon.yaml new file mode 100644 index 000000000000..353f155df0f4 --- /dev/null +++ b/Documentation/devicetree/bindings/power/reset/qcom,pon.yaml @@ -0,0 +1,80 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/power/reset/qcom,pon.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm PON Device + +maintainers: + - Vinod Koul <vkoul@kernel.org> + +description: | + The Power On device for Qualcomm PM8xxx is MFD supporting pwrkey + and resin along with the Android reboot-mode. + + This DT node has pwrkey and resin as sub nodes. + +allOf: + - $ref: reboot-mode.yaml# + +properties: + compatible: + enum: + - qcom,pm8916-pon + - qcom,pms405-pon + - qcom,pm8998-pon + + reg: + maxItems: 1 + + pwrkey: + type: object + $ref: "../../input/qcom,pm8941-pwrkey.yaml#" + + resin: + type: object + $ref: "../../input/qcom,pm8941-pwrkey.yaml#" + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/interrupt-controller/irq.h> + #include <dt-bindings/input/linux-event-codes.h> + #include <dt-bindings/spmi/spmi.h> + spmi_bus: spmi@c440000 { + reg = <0x0c440000 0x1100>; + #address-cells = <2>; + #size-cells = <0>; + pmk8350: pmic@0 { + reg = <0x0 SPMI_USID>; + #address-cells = <1>; + #size-cells = <0>; + pmk8350_pon: pon_hlos@1300 { + reg = <0x1300>; + compatible = "qcom,pm8998-pon"; + + pwrkey { + compatible = "qcom,pm8941-pwrkey"; + interrupts = < 0x0 0x8 0 IRQ_TYPE_EDGE_BOTH >; + debounce = <15625>; + bias-pull-up; + linux,code = <KEY_POWER>; + }; + + resin { + compatible = "qcom,pm8941-resin"; + interrupts = <0x0 0x8 1 IRQ_TYPE_EDGE_BOTH>; + debounce = <15625>; + bias-pull-up; + linux,code = <KEY_VOLUMEDOWN>; + }; + }; + }; + }; +... diff --git a/Documentation/devicetree/bindings/power/reset/reboot-mode.yaml b/Documentation/devicetree/bindings/power/reset/reboot-mode.yaml index 9c6fda6b1dd9..ad0a0b95cec1 100644 --- a/Documentation/devicetree/bindings/power/reset/reboot-mode.yaml +++ b/Documentation/devicetree/bindings/power/reset/reboot-mode.yaml @@ -36,7 +36,7 @@ patternProperties: "^mode-.*$": $ref: /schemas/types.yaml#/definitions/uint32 -additionalProperties: false +additionalProperties: true examples: - | diff --git a/Documentation/devicetree/bindings/pwm/pwm-rockchip.yaml b/Documentation/devicetree/bindings/pwm/pwm-rockchip.yaml index 5596bee70509..81a54a4e8e3e 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-rockchip.yaml +++ b/Documentation/devicetree/bindings/pwm/pwm-rockchip.yaml @@ -29,6 +29,7 @@ properties: - enum: - rockchip,px30-pwm - rockchip,rk3308-pwm + - rockchip,rk3568-pwm - const: rockchip,rk3328-pwm reg: diff --git a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml index 7548d8714871..13925bb78ec7 100644 --- a/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/trivial-rtc.yaml @@ -32,6 +32,9 @@ properties: - dallas,ds3232 # I2C-BUS INTERFACE REAL TIME CLOCK MODULE - epson,rx8010 + # I2C-BUS INTERFACE REAL TIME CLOCK MODULE + - epson,rx8025 + - epson,rx8035 # I2C-BUS INTERFACE REAL TIME CLOCK MODULE with Battery Backed RAM - epson,rx8571 # I2C-BUS INTERFACE REAL TIME CLOCK MODULE diff --git a/Documentation/devicetree/bindings/spi/omap-spi.yaml b/Documentation/devicetree/bindings/spi/omap-spi.yaml index e55538186cf6..9952199cae11 100644 --- a/Documentation/devicetree/bindings/spi/omap-spi.yaml +++ b/Documentation/devicetree/bindings/spi/omap-spi.yaml @@ -84,9 +84,9 @@ unevaluatedProperties: false if: properties: compatible: - oneOf: - - const: ti,omap2-mcspi - - const: ti,omap4-mcspi + enum: + - ti,omap2-mcspi + - ti,omap4-mcspi then: properties: diff --git a/Documentation/devicetree/bindings/spi/spi-xilinx.yaml b/Documentation/devicetree/bindings/spi/spi-xilinx.yaml index 593f7693bace..03e5dca7e933 100644 --- a/Documentation/devicetree/bindings/spi/spi-xilinx.yaml +++ b/Documentation/devicetree/bindings/spi/spi-xilinx.yaml @@ -27,13 +27,11 @@ properties: xlnx,num-ss-bits: description: Number of chip selects used. - $ref: /schemas/types.yaml#/definitions/uint32 minimum: 1 maximum: 32 xlnx,num-transfer-bits: description: Number of bits per transfer. This will be 8 if not specified. - $ref: /schemas/types.yaml#/definitions/uint32 enum: [8, 16, 32] default: 8 diff --git a/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml b/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml new file mode 100644 index 000000000000..289e9a845600 --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml @@ -0,0 +1,82 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright 2021 Linaro Ltd. +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/thermal/qcom-lmh.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Limits Management Hardware(LMh) + +maintainers: + - Thara Gopinath <thara.gopinath@linaro.org> + +description: + Limits Management Hardware(LMh) is a hardware infrastructure on some + Qualcomm SoCs that can enforce temperature and current limits as + programmed by software for certain IPs like CPU. + +properties: + compatible: + enum: + - qcom,sdm845-lmh + + reg: + items: + - description: core registers + + interrupts: + maxItems: 1 + + '#interrupt-cells': + const: 1 + + interrupt-controller: true + + cpus: + description: + phandle of the first cpu in the LMh cluster + $ref: /schemas/types.yaml#/definitions/phandle + + qcom,lmh-temp-arm-millicelsius: + description: + An integer expressing temperature threshold at which the LMh thermal + FSM is engaged. + + qcom,lmh-temp-low-millicelsius: + description: + An integer expressing temperature threshold at which the state machine + will attempt to remove frequency throttling. + + qcom,lmh-temp-high-millicelsius: + description: + An integer expressing temperature threshold at which the state machine + will attempt to throttle the frequency. + +required: + - compatible + - reg + - interrupts + - '#interrupt-cells' + - interrupt-controller + - cpus + - qcom,lmh-temp-arm-millicelsius + - qcom,lmh-temp-low-millicelsius + - qcom,lmh-temp-high-millicelsius + +additionalProperties: false + +examples: + - | + #include <dt-bindings/interrupt-controller/arm-gic.h> + + lmh@17d70800 { + compatible = "qcom,sdm845-lmh"; + reg = <0x17d70800 0x400>; + interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>; + cpus = <&CPU4>; + qcom,lmh-temp-arm-millicelsius = <65000>; + qcom,lmh-temp-low-millicelsius = <94500>; + qcom,lmh-temp-high-millicelsius = <95000>; + interrupt-controller; + #interrupt-cells = <1>; + }; diff --git a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml index 164f71598c59..a07de5ed0ca6 100644 --- a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml +++ b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml @@ -215,7 +215,7 @@ patternProperties: - polling-delay - polling-delay-passive - thermal-sensors - - trips + additionalProperties: false additionalProperties: false diff --git a/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml b/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml new file mode 100644 index 000000000000..b9ca8ef4f2be --- /dev/null +++ b/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/ufs/samsung,exynos-ufs.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Samsung SoC series UFS host controller Device Tree Bindings + +maintainers: + - Alim Akhtar <alim.akhtar@samsung.com> + +description: | + Each Samsung UFS host controller instance should have its own node. + This binding define Samsung specific binding other then what is used + in the common ufshcd bindings + [1] Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt + +properties: + + compatible: + enum: + - samsung,exynos7-ufs + + reg: + items: + - description: HCI register + - description: vendor specific register + - description: unipro register + - description: UFS protector register + + reg-names: + items: + - const: hci + - const: vs_hci + - const: unipro + - const: ufsp + + clocks: + items: + - description: ufs link core clock + - description: unipro main clock + + clock-names: + items: + - const: core_clk + - const: sclk_unipro_main + + interrupts: + maxItems: 1 + + phys: + maxItems: 1 + + phy-names: + const: ufs-phy + +required: + - compatible + - reg + - interrupts + - phys + - phy-names + - clocks + - clock-names + +additionalProperties: false + +examples: + - | + #include <dt-bindings/interrupt-controller/arm-gic.h> + #include <dt-bindings/clock/exynos7-clk.h> + + ufs: ufs@15570000 { + compatible = "samsung,exynos7-ufs"; + reg = <0x15570000 0x100>, + <0x15570100 0x100>, + <0x15571000 0x200>, + <0x15572000 0x300>; + reg-names = "hci", "vs_hci", "unipro", "ufsp"; + interrupts = <GIC_SPI 200 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clock_fsys1 ACLK_UFS20_LINK>, + <&clock_fsys1 SCLK_UFSUNIPRO20_USER>; + clock-names = "core_clk", "sclk_unipro_main"; + pinctrl-names = "default"; + pinctrl-0 = <&ufs_rst_n &ufs_refclk_out>; + phys = <&ufs_phy>; + phy-names = "ufs-phy"; + }; +... diff --git a/Documentation/devicetree/bindings/virtio/mmio.yaml b/Documentation/devicetree/bindings/virtio/mmio.yaml index d46597028cf1..4b7a0273181c 100644 --- a/Documentation/devicetree/bindings/virtio/mmio.yaml +++ b/Documentation/devicetree/bindings/virtio/mmio.yaml @@ -36,7 +36,8 @@ required: - reg - interrupts -additionalProperties: false +additionalProperties: + type: object examples: - | diff --git a/Documentation/devicetree/bindings/virtio/virtio-device.yaml b/Documentation/devicetree/bindings/virtio/virtio-device.yaml new file mode 100644 index 000000000000..1778ea9b5aa5 --- /dev/null +++ b/Documentation/devicetree/bindings/virtio/virtio-device.yaml @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/virtio/virtio-device.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Virtio device bindings + +maintainers: + - Viresh Kumar <viresh.kumar@linaro.org> + +description: + These bindings are applicable to virtio devices irrespective of the bus they + are bound to, like mmio or pci. + +# We need a select here so we don't match all nodes with 'virtio,mmio' +properties: + compatible: + pattern: "^virtio,device[0-9a-f]{1,8}$" + description: Virtio device nodes. + "virtio,deviceID", where ID is the virtio device id. The textual + representation of ID shall be in lower case hexadecimal with leading + zeroes suppressed. + +required: + - compatible + +additionalProperties: true + +examples: + - | + virtio@3000 { + compatible = "virtio,mmio"; + reg = <0x3000 0x100>; + interrupts = <43>; + + i2c { + compatible = "virtio,device22"; + }; + }; +... diff --git a/Documentation/devicetree/bindings/watchdog/maxim,max63xx.yaml b/Documentation/devicetree/bindings/watchdog/maxim,max63xx.yaml index f2105eedac2c..ab9641e845db 100644 --- a/Documentation/devicetree/bindings/watchdog/maxim,max63xx.yaml +++ b/Documentation/devicetree/bindings/watchdog/maxim,max63xx.yaml @@ -15,13 +15,13 @@ maintainers: properties: compatible: - oneOf: - - const: maxim,max6369 - - const: maxim,max6370 - - const: maxim,max6371 - - const: maxim,max6372 - - const: maxim,max6373 - - const: maxim,max6374 + enum: + - maxim,max6369 + - maxim,max6370 + - maxim,max6371 + - maxim,max6372 + - maxim,max6373 + - maxim,max6374 reg: description: This is a 1-byte memory-mapped address diff --git a/Documentation/features/vm/ELF-ASLR/arch-support.txt b/Documentation/features/vm/ELF-ASLR/arch-support.txt index 99cb6d7f5005..2949c99fbb2f 100644 --- a/Documentation/features/vm/ELF-ASLR/arch-support.txt +++ b/Documentation/features/vm/ELF-ASLR/arch-support.txt @@ -22,7 +22,7 @@ | openrisc: | TODO | | parisc: | ok | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | ok | | sh: | TODO | | sparc: | TODO | diff --git a/Documentation/filesystems/api-summary.rst b/Documentation/filesystems/api-summary.rst index 7e5c04c98619..98db2ea5fa12 100644 --- a/Documentation/filesystems/api-summary.rst +++ b/Documentation/filesystems/api-summary.rst @@ -71,9 +71,6 @@ Other Functions .. kernel-doc:: fs/fs-writeback.c :export: -.. kernel-doc:: fs/block_dev.c - :export: - .. kernel-doc:: fs/anon_inodes.c :export: diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index d5a73fa2c9ef..8126beadc7df 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -37,7 +37,7 @@ TTM initialization This section is outdated. Drivers wishing to support TTM must pass a filled :c:type:`ttm_bo_driver -<ttm_bo_driver>` structure to ttm_bo_device_init, together with an +<ttm_bo_driver>` structure to ttm_device_init, together with an initialized global reference to the memory manager. The ttm_bo_driver structure contains several fields with function pointers for initializing the TTM, allocating and freeing memory, waiting for command diff --git a/Documentation/networking/dsa/sja1105.rst b/Documentation/networking/dsa/sja1105.rst index 564caeebe2b2..29b1bae0cf00 100644 --- a/Documentation/networking/dsa/sja1105.rst +++ b/Documentation/networking/dsa/sja1105.rst @@ -296,7 +296,7 @@ not available. Device Tree bindings and board design ===================================== -This section references ``Documentation/devicetree/bindings/net/dsa/sja1105.txt`` +This section references ``Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml`` and aims to showcase some potential switch caveats. RMII PHY role and out-of-band signaling diff --git a/Documentation/power/energy-model.rst b/Documentation/power/energy-model.rst index 60ac091d3b0d..8a2788afe89b 100644 --- a/Documentation/power/energy-model.rst +++ b/Documentation/power/energy-model.rst @@ -101,8 +101,7 @@ subsystems which use EM might rely on this flag to check if all EM devices use the same scale. If there are different scales, these subsystems might decide to: return warning/error, stop working or panic. See Section 3. for an example of driver implementing this -callback, and kernel/power/energy_model.c for further documentation on this -API. +callback, or Section 2.4 for further documentation on this API 2.3 Accessing performance domains @@ -123,7 +122,17 @@ em_cpu_energy() API. The estimation is performed assuming that the schedutil CPUfreq governor is in use in case of CPU device. Currently this calculation is not provided for other type of devices. -More details about the above APIs can be found in include/linux/energy_model.h. +More details about the above APIs can be found in ``<linux/energy_model.h>`` +or in Section 2.4 + + +2.4 Description details of this API +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. kernel-doc:: include/linux/energy_model.h + :internal: + +.. kernel-doc:: kernel/power/energy_model.c + :export: 3. Example driver diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index d3a8557b66a1..e35ab74a0f80 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -29,7 +29,7 @@ you probably needn't concern yourself with pcmciautils. ====================== =============== ======================================== Program Minimal version Command to check the version ====================== =============== ======================================== -GNU C 4.9 gcc --version +GNU C 5.1 gcc --version Clang/LLVM (optional) 10.0.1 clang --version GNU make 3.81 make --version binutils 2.23 ld -v diff --git a/Documentation/translations/zh_CN/admin-guide/README.rst b/Documentation/translations/zh_CN/admin-guide/README.rst index 669a022f6817..980eb20521cf 100644 --- a/Documentation/translations/zh_CN/admin-guide/README.rst +++ b/Documentation/translations/zh_CN/admin-guide/README.rst @@ -223,7 +223,7 @@ Linux内核5.x版本 <http://kernel.org/> 编译内核 --------- - - 确保您至少有gcc 4.9可用。 + - 确保您至少有gcc 5.1可用。 有关更多信息,请参阅 :ref:`Documentation/process/changes.rst <changes>` 。 请注意,您仍然可以使用此内核运行a.out用户程序。 diff --git a/Documentation/translations/zh_TW/admin-guide/README.rst b/Documentation/translations/zh_TW/admin-guide/README.rst index b752e50359e6..6ce97edbab37 100644 --- a/Documentation/translations/zh_TW/admin-guide/README.rst +++ b/Documentation/translations/zh_TW/admin-guide/README.rst @@ -226,7 +226,7 @@ Linux內核5.x版本 <http://kernel.org/> 編譯內核 --------- - - 確保您至少有gcc 4.9可用。 + - 確保您至少有gcc 5.1可用。 有關更多信息,請參閱 :ref:`Documentation/process/changes.rst <changes>` 。 請注意,您仍然可以使用此內核運行a.out用戶程序。 diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst index 0b5eefed027e..c432be070f67 100644 --- a/Documentation/userspace-api/index.rst +++ b/Documentation/userspace-api/index.rst @@ -27,6 +27,7 @@ place where this information is gathered. iommu media/index sysfs-platform_profile + vduse .. only:: subproject and html diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index b7070d76f076..2e8134059c87 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -299,6 +299,7 @@ Code Seq# Include File Comments 'z' 10-4F drivers/s390/crypto/zcrypt_api.h conflict! '|' 00-7F linux/media.h 0x80 00-1F linux/fb.h +0x81 00-1F linux/vduse.h 0x89 00-06 arch/x86/include/asm/sockios.h 0x89 0B-DF linux/sockios.h 0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range diff --git a/Documentation/userspace-api/vduse.rst b/Documentation/userspace-api/vduse.rst new file mode 100644 index 000000000000..42ef59ea5314 --- /dev/null +++ b/Documentation/userspace-api/vduse.rst @@ -0,0 +1,233 @@ +================================== +VDUSE - "vDPA Device in Userspace" +================================== + +vDPA (virtio data path acceleration) device is a device that uses a +datapath which complies with the virtio specifications with vendor +specific control path. vDPA devices can be both physically located on +the hardware or emulated by software. VDUSE is a framework that makes it +possible to implement software-emulated vDPA devices in userspace. And +to make the device emulation more secure, the emulated vDPA device's +control path is handled in the kernel and only the data path is +implemented in the userspace. + +Note that only virtio block device is supported by VDUSE framework now, +which can reduce security risks when the userspace process that implements +the data path is run by an unprivileged user. The support for other device +types can be added after the security issue of corresponding device driver +is clarified or fixed in the future. + +Create/Destroy VDUSE devices +------------------------ + +VDUSE devices are created as follows: + +1. Create a new VDUSE instance with ioctl(VDUSE_CREATE_DEV) on + /dev/vduse/control. + +2. Setup each virtqueue with ioctl(VDUSE_VQ_SETUP) on /dev/vduse/$NAME. + +3. Begin processing VDUSE messages from /dev/vduse/$NAME. The first + messages will arrive while attaching the VDUSE instance to vDPA bus. + +4. Send the VDPA_CMD_DEV_NEW netlink message to attach the VDUSE + instance to vDPA bus. + +VDUSE devices are destroyed as follows: + +1. Send the VDPA_CMD_DEV_DEL netlink message to detach the VDUSE + instance from vDPA bus. + +2. Close the file descriptor referring to /dev/vduse/$NAME. + +3. Destroy the VDUSE instance with ioctl(VDUSE_DESTROY_DEV) on + /dev/vduse/control. + +The netlink messages can be sent via vdpa tool in iproute2 or use the +below sample codes: + +.. code-block:: c + + static int netlink_add_vduse(const char *name, enum vdpa_command cmd) + { + struct nl_sock *nlsock; + struct nl_msg *msg; + int famid; + + nlsock = nl_socket_alloc(); + if (!nlsock) + return -ENOMEM; + + if (genl_connect(nlsock)) + goto free_sock; + + famid = genl_ctrl_resolve(nlsock, VDPA_GENL_NAME); + if (famid < 0) + goto close_sock; + + msg = nlmsg_alloc(); + if (!msg) + goto close_sock; + + if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, famid, 0, 0, cmd, 0)) + goto nla_put_failure; + + NLA_PUT_STRING(msg, VDPA_ATTR_DEV_NAME, name); + if (cmd == VDPA_CMD_DEV_NEW) + NLA_PUT_STRING(msg, VDPA_ATTR_MGMTDEV_DEV_NAME, "vduse"); + + if (nl_send_sync(nlsock, msg)) + goto close_sock; + + nl_close(nlsock); + nl_socket_free(nlsock); + + return 0; + nla_put_failure: + nlmsg_free(msg); + close_sock: + nl_close(nlsock); + free_sock: + nl_socket_free(nlsock); + return -1; + } + +How VDUSE works +--------------- + +As mentioned above, a VDUSE device is created by ioctl(VDUSE_CREATE_DEV) on +/dev/vduse/control. With this ioctl, userspace can specify some basic configuration +such as device name (uniquely identify a VDUSE device), virtio features, virtio +configuration space, the number of virtqueues and so on for this emulated device. +Then a char device interface (/dev/vduse/$NAME) is exported to userspace for device +emulation. Userspace can use the VDUSE_VQ_SETUP ioctl on /dev/vduse/$NAME to +add per-virtqueue configuration such as the max size of virtqueue to the device. + +After the initialization, the VDUSE device can be attached to vDPA bus via +the VDPA_CMD_DEV_NEW netlink message. Userspace needs to read()/write() on +/dev/vduse/$NAME to receive/reply some control messages from/to VDUSE kernel +module as follows: + +.. code-block:: c + + static int vduse_message_handler(int dev_fd) + { + int len; + struct vduse_dev_request req; + struct vduse_dev_response resp; + + len = read(dev_fd, &req, sizeof(req)); + if (len != sizeof(req)) + return -1; + + resp.request_id = req.request_id; + + switch (req.type) { + + /* handle different types of messages */ + + } + + len = write(dev_fd, &resp, sizeof(resp)); + if (len != sizeof(resp)) + return -1; + + return 0; + } + +There are now three types of messages introduced by VDUSE framework: + +- VDUSE_GET_VQ_STATE: Get the state for virtqueue, userspace should return + avail index for split virtqueue or the device/driver ring wrap counters and + the avail and used index for packed virtqueue. + +- VDUSE_SET_STATUS: Set the device status, userspace should follow + the virtio spec: https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.html + to process this message. For example, fail to set the FEATURES_OK device + status bit if the device can not accept the negotiated virtio features + get from the VDUSE_DEV_GET_FEATURES ioctl. + +- VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for specified + IOVA range, userspace should firstly remove the old mapping, then setup the new + mapping via the VDUSE_IOTLB_GET_FD ioctl. + +After DRIVER_OK status bit is set via the VDUSE_SET_STATUS message, userspace is +able to start the dataplane processing as follows: + +1. Get the specified virtqueue's information with the VDUSE_VQ_GET_INFO ioctl, + including the size, the IOVAs of descriptor table, available ring and used ring, + the state and the ready status. + +2. Pass the above IOVAs to the VDUSE_IOTLB_GET_FD ioctl so that those IOVA regions + can be mapped into userspace. Some sample codes is shown below: + +.. code-block:: c + + static int perm_to_prot(uint8_t perm) + { + int prot = 0; + + switch (perm) { + case VDUSE_ACCESS_WO: + prot |= PROT_WRITE; + break; + case VDUSE_ACCESS_RO: + prot |= PROT_READ; + break; + case VDUSE_ACCESS_RW: + prot |= PROT_READ | PROT_WRITE; + break; + } + + return prot; + } + + static void *iova_to_va(int dev_fd, uint64_t iova, uint64_t *len) + { + int fd; + void *addr; + size_t size; + struct vduse_iotlb_entry entry; + + entry.start = iova; + entry.last = iova; + + /* + * Find the first IOVA region that overlaps with the specified + * range [start, last] and return the corresponding file descriptor. + */ + fd = ioctl(dev_fd, VDUSE_IOTLB_GET_FD, &entry); + if (fd < 0) + return NULL; + + size = entry.last - entry.start + 1; + *len = entry.last - iova + 1; + addr = mmap(0, size, perm_to_prot(entry.perm), MAP_SHARED, + fd, entry.offset); + close(fd); + if (addr == MAP_FAILED) + return NULL; + + /* + * Using some data structures such as linked list to store + * the iotlb mapping. The munmap(2) should be called for the + * cached mapping when the corresponding VDUSE_UPDATE_IOTLB + * message is received or the device is reset. + */ + + return addr + iova - entry.start; + } + +3. Setup the kick eventfd for the specified virtqueues with the VDUSE_VQ_SETUP_KICKFD + ioctl. The kick eventfd is used by VDUSE kernel module to notify userspace to + consume the available ring. This is optional since userspace can choose to poll the + available ring instead. + +4. Listen to the kick eventfd (optional) and consume the available ring. The buffer + described by the descriptors in the descriptor table should be also mapped into + userspace via the VDUSE_IOTLB_GET_FD ioctl before accessing. + +5. Inject an interrupt for specific virtqueue with the VDUSE_INJECT_VQ_IRQ ioctl + after the used ring is filled. + +For more details on the uAPI, please see include/uapi/linux/vduse.h. diff --git a/MAINTAINERS b/MAINTAINERS index 5ec52be126f8..ca6d6fde85cf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -333,7 +333,7 @@ S: Maintained F: drivers/platform/x86/acer-wmi.c ACPI -M: "Rafael J. Wysocki" <rjw@rjwysocki.net> +M: "Rafael J. Wysocki" <rafael@kernel.org> M: Len Brown <lenb@kernel.org> L: linux-acpi@vger.kernel.org S: Supported @@ -354,7 +354,7 @@ F: include/linux/fwnode.h F: tools/power/acpi/ ACPI APEI -M: "Rafael J. Wysocki" <rjw@rjwysocki.net> +M: "Rafael J. Wysocki" <rafael@kernel.org> M: Len Brown <lenb@kernel.org> R: James Morse <james.morse@arm.com> R: Tony Luck <tony.luck@intel.com> @@ -364,7 +364,6 @@ F: drivers/acpi/apei/ ACPI COMPONENT ARCHITECTURE (ACPICA) M: Robert Moore <robert.moore@intel.com> -M: Erik Kaneda <erik.kaneda@intel.com> M: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com> L: linux-acpi@vger.kernel.org L: devel@acpica.org @@ -403,7 +402,7 @@ S: Maintained F: drivers/platform/x86/i2c-multi-instantiate.c ACPI PMIC DRIVERS -M: "Rafael J. Wysocki" <rjw@rjwysocki.net> +M: "Rafael J. Wysocki" <rafael@kernel.org> M: Len Brown <lenb@kernel.org> R: Andy Shevchenko <andy@kernel.org> R: Mika Westerberg <mika.westerberg@linux.intel.com> @@ -3314,7 +3313,6 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git F: block/ F: drivers/block/ -F: fs/block_dev.c F: include/linux/blk* F: kernel/trace/blktrace.c F: lib/sbitmap.c @@ -4686,6 +4684,7 @@ F: drivers/platform/x86/compal-laptop.c COMPILER ATTRIBUTES M: Miguel Ojeda <ojeda@kernel.org> +R: Nick Desaulniers <ndesaulniers@google.com> S: Maintained F: include/linux/compiler_attributes.h @@ -4827,7 +4826,7 @@ W: http://www.arm.com/products/processors/technologies/biglittleprocessing.php F: drivers/cpufreq/vexpress-spc-cpufreq.c CPU FREQUENCY SCALING FRAMEWORK -M: "Rafael J. Wysocki" <rjw@rjwysocki.net> +M: "Rafael J. Wysocki" <rafael@kernel.org> M: Viresh Kumar <viresh.kumar@linaro.org> L: linux-pm@vger.kernel.org S: Maintained @@ -4845,7 +4844,7 @@ F: kernel/sched/cpufreq*.c F: tools/testing/selftests/cpufreq/ CPU IDLE TIME MANAGEMENT FRAMEWORK -M: "Rafael J. Wysocki" <rjw@rjwysocki.net> +M: "Rafael J. Wysocki" <rafael@kernel.org> M: Daniel Lezcano <daniel.lezcano@linaro.org> L: linux-pm@vger.kernel.org S: Maintained @@ -7591,7 +7590,7 @@ W: ftp://ftp.openlinux.org/pub/people/hch/vxfs F: fs/freevxfs/ FREEZER -M: "Rafael J. Wysocki" <rjw@rjwysocki.net> +M: "Rafael J. Wysocki" <rafael@kernel.org> M: Pavel Machek <pavel@ucw.cz> L: linux-pm@vger.kernel.org S: Supported @@ -7844,7 +7843,7 @@ S: Supported F: drivers/i2c/muxes/i2c-demux-pinctrl.c GENERIC PM DOMAINS -M: "Rafael J. Wysocki" <rjw@rjwysocki.net> +M: "Rafael J. Wysocki" <rafael@kernel.org> M: Kevin Hilman <khilman@kernel.org> M: Ulf Hansson <ulf.hansson@linaro.org> L: linux-pm@vger.kernel.org @@ -8310,7 +8309,7 @@ W: http://drama.obuda.kando.hu/~fero/cgi-bin/hgafb.shtml F: drivers/video/fbdev/hgafb.c HIBERNATION (aka Software Suspend, aka swsusp) -M: "Rafael J. Wysocki" <rjw@rjwysocki.net> +M: "Rafael J. Wysocki" <rafael@kernel.org> M: Pavel Machek <pavel@ucw.cz> L: linux-pm@vger.kernel.org S: Supported @@ -10623,10 +10622,10 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git F: drivers/ata/sata_promise.* LIBATA SUBSYSTEM (Serial and Parallel ATA drivers) -M: Jens Axboe <axboe@kernel.dk> +M: Damien Le Moal <damien.lemoal@opensource.wdc.com> L: linux-ide@vger.kernel.org S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/libata.git F: Documentation/devicetree/bindings/ata/ F: drivers/ata/ F: include/linux/ata.h @@ -13410,7 +13409,7 @@ F: include/linux/nvme-fc.h NVM EXPRESS TARGET DRIVER M: Christoph Hellwig <hch@lst.de> M: Sagi Grimberg <sagi@grimberg.me> -M: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com> +M: Chaitanya Kulkarni <kch@nvidia.com> L: linux-nvme@lists.infradead.org S: Supported W: http://git.infradead.org/nvme.git @@ -14343,7 +14342,8 @@ F: Documentation/devicetree/bindings/pci/intel,ixp4xx-pci.yaml F: drivers/pci/controller/pci-ixp4xx.c PCI DRIVER FOR INTEL VOLUME MANAGEMENT DEVICE (VMD) -M: Jonathan Derrick <jonathan.derrick@intel.com> +M: Nirmal Patel <nirmal.patel@linux.intel.com> +R: Jonathan Derrick <jonathan.derrick@linux.dev> L: linux-pci@vger.kernel.org S: Supported F: drivers/pci/controller/vmd.c @@ -14969,7 +14969,7 @@ F: kernel/time/*timer* F: kernel/time/namespace.c POWER MANAGEMENT CORE -M: "Rafael J. Wysocki" <rjw@rjwysocki.net> +M: "Rafael J. Wysocki" <rafael@kernel.org> L: linux-pm@vger.kernel.org S: Supported B: https://bugzilla.kernel.org @@ -17947,7 +17947,7 @@ F: arch/sh/ F: drivers/sh/ SUSPEND TO RAM -M: "Rafael J. Wysocki" <rjw@rjwysocki.net> +M: "Rafael J. Wysocki" <rafael@kernel.org> M: Len Brown <len.brown@intel.com> M: Pavel Machek <pavel@ucw.cz> L: linux-pm@vger.kernel.org @@ -18567,6 +18567,7 @@ F: drivers/thermal/ F: include/linux/cpu_cooling.h F: include/linux/thermal.h F: include/uapi/linux/thermal.h +F: tools/thermal/ THERMAL DRIVER FOR AMLOGIC SOCS M: Guillaume La Roque <glaroque@baylibre.com> @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 -PATCHLEVEL = 14 +PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc2 NAME = Opossums on Parade # *DOCUMENTATION* @@ -849,12 +849,6 @@ endif DEBUG_CFLAGS := -# Workaround for GCC versions < 5.0 -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61801 -ifdef CONFIG_CC_IS_GCC -DEBUG_CFLAGS += $(call cc-ifversion, -lt, 0500, $(call cc-option, -fno-var-tracking-assignments)) -endif - ifdef CONFIG_DEBUG_INFO ifdef CONFIG_DEBUG_INFO_SPLIT diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 02e5b673bdad..4e87783c90ad 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -20,7 +20,7 @@ config ALPHA select NEED_SG_DMA_LENGTH select VIRT_TO_BUS select GENERIC_IRQ_PROBE - select GENERIC_PCI_IOMAP if PCI + select GENERIC_PCI_IOMAP select AUTO_IRQ_AFFINITY if SMP select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION @@ -199,7 +199,6 @@ config ALPHA_EIGER config ALPHA_JENSEN bool "Jensen" - depends on BROKEN select HAVE_EISA help DEC PC 150 AXP (aka Jensen): This is a very old Digital system - one diff --git a/arch/alpha/include/asm/asm-prototypes.h b/arch/alpha/include/asm/asm-prototypes.h index b34cc1f06720..c8ae46fc2e74 100644 --- a/arch/alpha/include/asm/asm-prototypes.h +++ b/arch/alpha/include/asm/asm-prototypes.h @@ -16,3 +16,4 @@ extern void __divlu(void); extern void __remlu(void); extern void __divqu(void); extern void __remqu(void); +extern unsigned long __udiv_qrnnd(unsigned long *, unsigned long, unsigned long , unsigned long); diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index 0fab5ac90775..c9cb554fbe54 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -60,7 +60,7 @@ extern inline void set_hae(unsigned long new_hae) * Change virtual addresses to physical addresses and vv. */ #ifdef USE_48_BIT_KSEG -static inline unsigned long virt_to_phys(void *address) +static inline unsigned long virt_to_phys(volatile void *address) { return (unsigned long)address - IDENT_ADDR; } @@ -70,7 +70,7 @@ static inline void * phys_to_virt(unsigned long address) return (void *) (address + IDENT_ADDR); } #else -static inline unsigned long virt_to_phys(void *address) +static inline unsigned long virt_to_phys(volatile void *address) { unsigned long phys = (unsigned long)address; @@ -106,7 +106,7 @@ static inline void * phys_to_virt(unsigned long address) extern unsigned long __direct_map_base; extern unsigned long __direct_map_size; -static inline unsigned long __deprecated virt_to_bus(void *address) +static inline unsigned long __deprecated virt_to_bus(volatile void *address) { unsigned long phys = virt_to_phys(address); unsigned long bus = phys + __direct_map_base; diff --git a/arch/alpha/include/asm/jensen.h b/arch/alpha/include/asm/jensen.h index 916895155a88..1c4131453db2 100644 --- a/arch/alpha/include/asm/jensen.h +++ b/arch/alpha/include/asm/jensen.h @@ -111,18 +111,18 @@ __EXTERN_INLINE void jensen_set_hae(unsigned long addr) * convinced that I need one of the newer machines. */ -static inline unsigned int jensen_local_inb(unsigned long addr) +__EXTERN_INLINE unsigned int jensen_local_inb(unsigned long addr) { return 0xff & *(vuip)((addr << 9) + EISA_VL82C106); } -static inline void jensen_local_outb(u8 b, unsigned long addr) +__EXTERN_INLINE void jensen_local_outb(u8 b, unsigned long addr) { *(vuip)((addr << 9) + EISA_VL82C106) = b; mb(); } -static inline unsigned int jensen_bus_inb(unsigned long addr) +__EXTERN_INLINE unsigned int jensen_bus_inb(unsigned long addr) { long result; @@ -131,7 +131,7 @@ static inline unsigned int jensen_bus_inb(unsigned long addr) return __kernel_extbl(result, addr & 3); } -static inline void jensen_bus_outb(u8 b, unsigned long addr) +__EXTERN_INLINE void jensen_bus_outb(u8 b, unsigned long addr) { jensen_set_hae(0); *(vuip)((addr << 7) + EISA_IO + 0x00) = b * 0x01010101; diff --git a/arch/alpha/include/asm/setup.h b/arch/alpha/include/asm/setup.h new file mode 100644 index 000000000000..262aab99e391 --- /dev/null +++ b/arch/alpha/include/asm/setup.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ALPHA_SETUP_H +#define __ALPHA_SETUP_H + +#include <uapi/asm/setup.h> + +/* + * We leave one page for the initial stack page, and one page for + * the initial process structure. Also, the console eats 3 MB for + * the initial bootloader (one of which we can reclaim later). + */ +#define BOOT_PCB 0x20000000 +#define BOOT_ADDR 0x20000000 +/* Remove when official MILO sources have ELF support: */ +#define BOOT_SIZE (16*1024) + +#ifdef CONFIG_ALPHA_LEGACY_START_ADDRESS +#define KERNEL_START_PHYS 0x300000 /* Old bootloaders hardcoded this. */ +#else +#define KERNEL_START_PHYS 0x1000000 /* required: Wildfire/Titan/Marvel */ +#endif + +#define KERNEL_START (PAGE_OFFSET+KERNEL_START_PHYS) +#define SWAPPER_PGD KERNEL_START +#define INIT_STACK (PAGE_OFFSET+KERNEL_START_PHYS+0x02000) +#define EMPTY_PGT (PAGE_OFFSET+KERNEL_START_PHYS+0x04000) +#define EMPTY_PGE (PAGE_OFFSET+KERNEL_START_PHYS+0x08000) +#define ZERO_PGE (PAGE_OFFSET+KERNEL_START_PHYS+0x0A000) + +#define START_ADDR (PAGE_OFFSET+KERNEL_START_PHYS+0x10000) + +/* + * This is setup by the secondary bootstrap loader. Because + * the zero page is zeroed out as soon as the vm system is + * initialized, we need to copy things out into a more permanent + * place. + */ +#define PARAM ZERO_PGE +#define COMMAND_LINE ((char *)(absolute_pointer(PARAM + 0x0000))) +#define INITRD_START (*(unsigned long *) (PARAM+0x100)) +#define INITRD_SIZE (*(unsigned long *) (PARAM+0x108)) + +#endif diff --git a/arch/alpha/include/uapi/asm/setup.h b/arch/alpha/include/uapi/asm/setup.h index 13b7ee465b0e..f881ea5947cb 100644 --- a/arch/alpha/include/uapi/asm/setup.h +++ b/arch/alpha/include/uapi/asm/setup.h @@ -1,43 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __ALPHA_SETUP_H -#define __ALPHA_SETUP_H +#ifndef _UAPI__ALPHA_SETUP_H +#define _UAPI__ALPHA_SETUP_H #define COMMAND_LINE_SIZE 256 -/* - * We leave one page for the initial stack page, and one page for - * the initial process structure. Also, the console eats 3 MB for - * the initial bootloader (one of which we can reclaim later). - */ -#define BOOT_PCB 0x20000000 -#define BOOT_ADDR 0x20000000 -/* Remove when official MILO sources have ELF support: */ -#define BOOT_SIZE (16*1024) - -#ifdef CONFIG_ALPHA_LEGACY_START_ADDRESS -#define KERNEL_START_PHYS 0x300000 /* Old bootloaders hardcoded this. */ -#else -#define KERNEL_START_PHYS 0x1000000 /* required: Wildfire/Titan/Marvel */ -#endif - -#define KERNEL_START (PAGE_OFFSET+KERNEL_START_PHYS) -#define SWAPPER_PGD KERNEL_START -#define INIT_STACK (PAGE_OFFSET+KERNEL_START_PHYS+0x02000) -#define EMPTY_PGT (PAGE_OFFSET+KERNEL_START_PHYS+0x04000) -#define EMPTY_PGE (PAGE_OFFSET+KERNEL_START_PHYS+0x08000) -#define ZERO_PGE (PAGE_OFFSET+KERNEL_START_PHYS+0x0A000) - -#define START_ADDR (PAGE_OFFSET+KERNEL_START_PHYS+0x10000) - -/* - * This is setup by the secondary bootstrap loader. Because - * the zero page is zeroed out as soon as the vm system is - * initialized, we need to copy things out into a more permanent - * place. - */ -#define PARAM ZERO_PGE -#define COMMAND_LINE ((char*)(PARAM + 0x0000)) -#define INITRD_START (*(unsigned long *) (PARAM+0x100)) -#define INITRD_SIZE (*(unsigned long *) (PARAM+0x108)) - -#endif +#endif /* _UAPI__ALPHA_SETUP_H */ diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c index e5d870ff225f..5c9c88428124 100644 --- a/arch/alpha/kernel/sys_jensen.c +++ b/arch/alpha/kernel/sys_jensen.c @@ -7,6 +7,11 @@ * * Code supporting the Jensen. */ +#define __EXTERN_INLINE +#include <asm/io.h> +#include <asm/jensen.h> +#undef __EXTERN_INLINE + #include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/types.h> @@ -17,11 +22,6 @@ #include <asm/ptrace.h> -#define __EXTERN_INLINE inline -#include <asm/io.h> -#include <asm/jensen.h> -#undef __EXTERN_INLINE - #include <asm/dma.h> #include <asm/irq.h> #include <asm/mmu_context.h> diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile index 854d5e79979e..1cc74f7b50ef 100644 --- a/arch/alpha/lib/Makefile +++ b/arch/alpha/lib/Makefile @@ -14,6 +14,7 @@ ev6-$(CONFIG_ALPHA_EV6) := ev6- ev67-$(CONFIG_ALPHA_EV67) := ev67- lib-y = __divqu.o __remqu.o __divlu.o __remlu.o \ + udiv-qrnnd.o \ udelay.o \ $(ev6-y)memset.o \ $(ev6-y)memcpy.o \ diff --git a/arch/alpha/math-emu/qrnnd.S b/arch/alpha/lib/udiv-qrnnd.S index d6373ec1bff9..b887aa5428e5 100644 --- a/arch/alpha/math-emu/qrnnd.S +++ b/arch/alpha/lib/udiv-qrnnd.S @@ -25,6 +25,7 @@ # along with GCC; see the file COPYING. If not, write to the # Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, # MA 02111-1307, USA. +#include <asm/export.h> .set noreorder .set noat @@ -161,3 +162,4 @@ $Odd: ret $31,($26),1 .end __udiv_qrnnd +EXPORT_SYMBOL(__udiv_qrnnd) diff --git a/arch/alpha/math-emu/Makefile b/arch/alpha/math-emu/Makefile index 6eda0973e183..3206402a8792 100644 --- a/arch/alpha/math-emu/Makefile +++ b/arch/alpha/math-emu/Makefile @@ -7,4 +7,4 @@ ccflags-y := -w obj-$(CONFIG_MATHEMU) += math-emu.o -math-emu-objs := math.o qrnnd.o +math-emu-objs := math.o diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c index f7cef66af88d..4212258f3cfd 100644 --- a/arch/alpha/math-emu/math.c +++ b/arch/alpha/math-emu/math.c @@ -403,5 +403,3 @@ alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask) egress: return si_code; } - -EXPORT_SYMBOL(__udiv_qrnnd); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 077f2ec4eeb2..5c7ae4c3954b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -86,7 +86,7 @@ config ARM64 select ARCH_SUPPORTS_LTO_CLANG_THIN select ARCH_SUPPORTS_CFI_CLANG select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG) + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT select ARCH_WANT_DEFAULT_BPF_JIT diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 7fa6828bb488..587543c6c51c 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -43,7 +43,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, this_leaf->type = type; } -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { unsigned int ctype, level, leaves, fw_level; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -78,7 +78,7 @@ static int __init_cache_level(unsigned int cpu) return 0; } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { unsigned int level, idx; enum cache_type type; @@ -97,6 +97,3 @@ static int __populate_cache_leaves(unsigned int cpu) } return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 5a294f20e9de..ff4962750b3d 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -513,7 +513,7 @@ size_t sve_state_size(struct task_struct const *task) void sve_alloc(struct task_struct *task) { if (task->thread.sve_state) { - memset(task->thread.sve_state, 0, sve_state_size(current)); + memset(task->thread.sve_state, 0, sve_state_size(task)); return; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 19100fe8f7e4..40adb8cdbf5a 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -18,7 +18,6 @@ #include <linux/mman.h> #include <linux/mm.h> #include <linux/nospec.h> -#include <linux/sched.h> #include <linux/stddef.h> #include <linux/sysctl.h> #include <linux/unistd.h> @@ -58,7 +57,7 @@ #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include <linux/stackprotector.h> -unsigned long __stack_chk_guard __read_mostly; +unsigned long __stack_chk_guard __ro_after_init; EXPORT_SYMBOL(__stack_chk_guard); #endif diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index b16be52233c6..37a81754d9b6 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -30,6 +30,7 @@ #include <linux/crash_dump.h> #include <linux/hugetlb.h> #include <linux/acpi_iort.h> +#include <linux/kmemleak.h> #include <asm/boot.h> #include <asm/fixmap.h> @@ -101,6 +102,11 @@ static void __init reserve_crashkernel(void) pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", crash_base, crash_base + crash_size, crash_size >> 20); + /* + * The crashkernel memory will be removed from the kernel linear + * map. Inform kmemleak so that it won't try to access it. + */ + kmemleak_ignore_phys(crash_base); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; } @@ -222,7 +228,21 @@ early_param("mem", early_mem); void __init arm64_memblock_init(void) { - const s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual); + s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual); + + /* + * Corner case: 52-bit VA capable systems running KVM in nVHE mode may + * be limited in their ability to support a linear map that exceeds 51 + * bits of VA space, depending on the placement of the ID map. Given + * that the placement of the ID map may be randomized, let's simply + * limit the kernel's linear map to 51 bits as well if we detect this + * configuration. + */ + if (IS_ENABLED(CONFIG_KVM) && vabits_actual == 52 && + is_hyp_mode_available() && !is_kernel_in_hyp_mode()) { + pr_info("Capping linear region to 51 bits for KVM in nVHE mode on LVA capable hardware.\n"); + linear_region_size = min_t(u64, linear_region_size, BIT(51)); + } /* Remove memory above our supported physical address size */ memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX); diff --git a/arch/m68k/include/asm/raw_io.h b/arch/m68k/include/asm/raw_io.h index 911826ea83ce..80eb2396d01e 100644 --- a/arch/m68k/include/asm/raw_io.h +++ b/arch/m68k/include/asm/raw_io.h @@ -17,21 +17,21 @@ * two accesses to memory, which may be undesirable for some devices. */ #define in_8(addr) \ - ({ u8 __v = (*(__force volatile u8 *) (addr)); __v; }) + ({ u8 __v = (*(__force volatile u8 *) (unsigned long)(addr)); __v; }) #define in_be16(addr) \ - ({ u16 __v = (*(__force volatile u16 *) (addr)); __v; }) + ({ u16 __v = (*(__force volatile u16 *) (unsigned long)(addr)); __v; }) #define in_be32(addr) \ - ({ u32 __v = (*(__force volatile u32 *) (addr)); __v; }) + ({ u32 __v = (*(__force volatile u32 *) (unsigned long)(addr)); __v; }) #define in_le16(addr) \ - ({ u16 __v = le16_to_cpu(*(__force volatile __le16 *) (addr)); __v; }) + ({ u16 __v = le16_to_cpu(*(__force volatile __le16 *) (unsigned long)(addr)); __v; }) #define in_le32(addr) \ - ({ u32 __v = le32_to_cpu(*(__force volatile __le32 *) (addr)); __v; }) + ({ u32 __v = le32_to_cpu(*(__force volatile __le32 *) (unsigned long)(addr)); __v; }) -#define out_8(addr,b) (void)((*(__force volatile u8 *) (addr)) = (b)) -#define out_be16(addr,w) (void)((*(__force volatile u16 *) (addr)) = (w)) -#define out_be32(addr,l) (void)((*(__force volatile u32 *) (addr)) = (l)) -#define out_le16(addr,w) (void)((*(__force volatile __le16 *) (addr)) = cpu_to_le16(w)) -#define out_le32(addr,l) (void)((*(__force volatile __le32 *) (addr)) = cpu_to_le32(l)) +#define out_8(addr,b) (void)((*(__force volatile u8 *) (unsigned long)(addr)) = (b)) +#define out_be16(addr,w) (void)((*(__force volatile u16 *) (unsigned long)(addr)) = (w)) +#define out_be32(addr,l) (void)((*(__force volatile u32 *) (unsigned long)(addr)) = (l)) +#define out_le16(addr,w) (void)((*(__force volatile __le16 *) (unsigned long)(addr)) = cpu_to_le16(w)) +#define out_le32(addr,l) (void)((*(__force volatile __le32 *) (unsigned long)(addr)) = cpu_to_le32(l)) #define raw_inb in_8 #define raw_inw in_be16 diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index e1e90c49a496..dfd6202fd403 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -171,7 +171,6 @@ static int bcd2int (unsigned char b) int mvme147_hwclk(int op, struct rtc_time *t) { -#warning check me! if (!op) { m147_rtc->ctrl = RTC_READ; t->tm_year = bcd2int (m147_rtc->bcd_year); @@ -183,6 +182,9 @@ int mvme147_hwclk(int op, struct rtc_time *t) m147_rtc->ctrl = 0; if (t->tm_year < 70) t->tm_year += 100; + } else { + /* FIXME Setting the time is not yet supported */ + return -EOPNOTSUPP; } return 0; } diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index b59593c7cfb9..b4422c2dfbbf 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -436,7 +436,6 @@ int bcd2int (unsigned char b) int mvme16x_hwclk(int op, struct rtc_time *t) { -#warning check me! if (!op) { rtc->ctrl = RTC_READ; t->tm_year = bcd2int (rtc->bcd_year); @@ -448,6 +447,9 @@ int mvme16x_hwclk(int op, struct rtc_time *t) rtc->ctrl = 0; if (t->tm_year < 70) t->tm_year += 100; + } else { + /* FIXME Setting the time is not yet supported */ + return -EOPNOTSUPP; } return 0; } diff --git a/arch/mips/kernel/cacheinfo.c b/arch/mips/kernel/cacheinfo.c index 53d8ea7d36e6..495dd058231d 100644 --- a/arch/mips/kernel/cacheinfo.c +++ b/arch/mips/kernel/cacheinfo.c @@ -17,7 +17,7 @@ do { \ leaf++; \ } while (0) -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { struct cpuinfo_mips *c = ¤t_cpu_data; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -74,7 +74,7 @@ static void fill_cpumask_cluster(int cpu, cpumask_t *cpu_map) cpumask_set_cpu(cpu1, cpu_map); } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { struct cpuinfo_mips *c = ¤t_cpu_data; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -114,6 +114,3 @@ static int __populate_cache_leaves(unsigned int cpu) return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 3001a7d8fc76..4742b6f169b7 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -10,7 +10,6 @@ config PARISC select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL - select ARCH_HAS_STRNLEN_USER select ARCH_NO_SG_CHAIN select ARCH_SUPPORTS_HUGETLBFS if PA20 select ARCH_SUPPORTS_MEMORY_FAILURE @@ -65,7 +64,6 @@ config PARISC select HAVE_KPROBES_ON_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS - select SET_FS select TRACE_IRQFLAGS_SUPPORT help diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile index dff453687530..9fe54878167d 100644 --- a/arch/parisc/boot/compressed/Makefile +++ b/arch/parisc/boot/compressed/Makefile @@ -26,7 +26,7 @@ endif OBJECTS += $(obj)/head.o $(obj)/real2.o $(obj)/firmware.o $(obj)/misc.o $(obj)/piggy.o LDFLAGS_vmlinux := -X -e startup --as-needed -T -$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) $(LIBGCC) +$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) $(LIBGCC) FORCE $(call if_changed,ld) sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\|parisc_kernel_start\)$$/\#define SZ\2 0x\1/p' @@ -34,7 +34,7 @@ sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\|parisc_kernel_start\ quiet_cmd_sizes = GEN $@ cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@ -$(obj)/sizes.h: vmlinux +$(obj)/sizes.h: vmlinux FORCE $(call if_changed,sizes) AFLAGS_head.o += -I$(objtree)/$(obj) -DBOOTLOADER @@ -70,19 +70,19 @@ suffix-$(CONFIG_KERNEL_LZMA) := lzma suffix-$(CONFIG_KERNEL_LZO) := lzo suffix-$(CONFIG_KERNEL_XZ) := xz -$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE $(call if_changed,gzip) -$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE $(call if_changed,bzip2) -$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE $(call if_changed,lz4) -$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE $(call if_changed,lzma) -$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE $(call if_changed,lzo) -$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE $(call if_changed,xzkern) LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T -$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) +$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) FORCE $(call if_changed,ld) diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index d00313d1274e..0561568f7b48 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -184,7 +184,7 @@ extern int npmem_ranges; #include <asm-generic/getorder.h> #include <asm/pdc.h> -#define PAGE0 ((struct zeropage *)__PAGE_OFFSET) +#define PAGE0 ((struct zeropage *)absolute_pointer(__PAGE_OFFSET)) /* DEFINITION OF THE ZERO-PAGE (PAG0) */ /* based on work by Jason Eckhardt (jason@equator.com) */ diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index b5fbcd2c1780..eeb7da064289 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -101,10 +101,6 @@ DECLARE_PER_CPU(struct cpuinfo_parisc, cpu_data); #define CPU_HVERSION ((boot_cpu_data.hversion >> 4) & 0x0FFF) -typedef struct { - int seg; -} mm_segment_t; - #define ARCH_MIN_TASKALIGN 8 struct thread_struct { diff --git a/arch/parisc/include/asm/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h index 2b3010ade00e..4b9e3d707571 100644 --- a/arch/parisc/include/asm/rt_sigframe.h +++ b/arch/parisc/include/asm/rt_sigframe.h @@ -2,7 +2,7 @@ #ifndef _ASM_PARISC_RT_SIGFRAME_H #define _ASM_PARISC_RT_SIGFRAME_H -#define SIGRETURN_TRAMP 4 +#define SIGRETURN_TRAMP 3 #define SIGRESTARTBLOCK_TRAMP 5 #define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP) diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index 0bd38a972cea..00ad50fef769 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -11,7 +11,6 @@ struct thread_info { struct task_struct *task; /* main task structure */ unsigned long flags; /* thread_info flags (see TIF_*) */ - mm_segment_t addr_limit; /* user-level address space limit */ __u32 cpu; /* current CPU */ int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ }; @@ -21,7 +20,6 @@ struct thread_info { .task = &tsk, \ .flags = 0, \ .cpu = 0, \ - .addr_limit = KERNEL_DS, \ .preempt_count = INIT_PREEMPT_COUNT, \ } diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 7c13314aae4a..192ad9e11b25 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -11,14 +11,6 @@ #include <linux/bug.h> #include <linux/string.h> -#define KERNEL_DS ((mm_segment_t){0}) -#define USER_DS ((mm_segment_t){1}) - -#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) - -#define get_fs() (current_thread_info()->addr_limit) -#define set_fs(x) (current_thread_info()->addr_limit = (x)) - /* * Note that since kernel addresses are in a separate address space on * parisc, we don't need to do anything for access_ok(). @@ -33,11 +25,11 @@ #define get_user __get_user #if !defined(CONFIG_64BIT) -#define LDD_USER(val, ptr) __get_user_asm64(val, ptr) -#define STD_USER(x, ptr) __put_user_asm64(x, ptr) +#define LDD_USER(sr, val, ptr) __get_user_asm64(sr, val, ptr) +#define STD_USER(sr, x, ptr) __put_user_asm64(sr, x, ptr) #else -#define LDD_USER(val, ptr) __get_user_asm(val, "ldd", ptr) -#define STD_USER(x, ptr) __put_user_asm("std", x, ptr) +#define LDD_USER(sr, val, ptr) __get_user_asm(sr, val, "ldd", ptr) +#define STD_USER(sr, x, ptr) __put_user_asm(sr, "std", x, ptr) #endif /* @@ -67,28 +59,15 @@ struct exception_table_entry { #define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\ ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1) -/* - * load_sr2() preloads the space register %%sr2 - based on the value of - * get_fs() - with either a value of 0 to access kernel space (KERNEL_DS which - * is 0), or with the current value of %%sr3 to access user space (USER_DS) - * memory. The following __get_user_asm() and __put_user_asm() functions have - * %%sr2 hard-coded to access the requested memory. - */ -#define load_sr2() \ - __asm__(" or,= %0,%%r0,%%r0\n\t" \ - " mfsp %%sr3,%0\n\t" \ - " mtsp %0,%%sr2\n\t" \ - : : "r"(get_fs()) : ) - -#define __get_user_internal(val, ptr) \ +#define __get_user_internal(sr, val, ptr) \ ({ \ register long __gu_err __asm__ ("r8") = 0; \ \ switch (sizeof(*(ptr))) { \ - case 1: __get_user_asm(val, "ldb", ptr); break; \ - case 2: __get_user_asm(val, "ldh", ptr); break; \ - case 4: __get_user_asm(val, "ldw", ptr); break; \ - case 8: LDD_USER(val, ptr); break; \ + case 1: __get_user_asm(sr, val, "ldb", ptr); break; \ + case 2: __get_user_asm(sr, val, "ldh", ptr); break; \ + case 4: __get_user_asm(sr, val, "ldw", ptr); break; \ + case 8: LDD_USER(sr, val, ptr); break; \ default: BUILD_BUG(); \ } \ \ @@ -97,15 +76,14 @@ struct exception_table_entry { #define __get_user(val, ptr) \ ({ \ - load_sr2(); \ - __get_user_internal(val, ptr); \ + __get_user_internal("%%sr3,", val, ptr); \ }) -#define __get_user_asm(val, ldx, ptr) \ +#define __get_user_asm(sr, val, ldx, ptr) \ { \ register long __gu_val; \ \ - __asm__("1: " ldx " 0(%%sr2,%2),%0\n" \ + __asm__("1: " ldx " 0(" sr "%2),%0\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ : "=r"(__gu_val), "=r"(__gu_err) \ @@ -114,9 +92,22 @@ struct exception_table_entry { (val) = (__force __typeof__(*(ptr))) __gu_val; \ } +#define HAVE_GET_KERNEL_NOFAULT +#define __get_kernel_nofault(dst, src, type, err_label) \ +{ \ + type __z; \ + long __err; \ + __err = __get_user_internal("%%sr0,", __z, (type *)(src)); \ + if (unlikely(__err)) \ + goto err_label; \ + else \ + *(type *)(dst) = __z; \ +} + + #if !defined(CONFIG_64BIT) -#define __get_user_asm64(val, ptr) \ +#define __get_user_asm64(sr, val, ptr) \ { \ union { \ unsigned long long l; \ @@ -124,8 +115,8 @@ struct exception_table_entry { } __gu_tmp; \ \ __asm__(" copy %%r0,%R0\n" \ - "1: ldw 0(%%sr2,%2),%0\n" \ - "2: ldw 4(%%sr2,%2),%R0\n" \ + "1: ldw 0(" sr "%2),%0\n" \ + "2: ldw 4(" sr "%2),%R0\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ @@ -138,16 +129,16 @@ struct exception_table_entry { #endif /* !defined(CONFIG_64BIT) */ -#define __put_user_internal(x, ptr) \ +#define __put_user_internal(sr, x, ptr) \ ({ \ register long __pu_err __asm__ ("r8") = 0; \ __typeof__(*(ptr)) __x = (__typeof__(*(ptr)))(x); \ \ switch (sizeof(*(ptr))) { \ - case 1: __put_user_asm("stb", __x, ptr); break; \ - case 2: __put_user_asm("sth", __x, ptr); break; \ - case 4: __put_user_asm("stw", __x, ptr); break; \ - case 8: STD_USER(__x, ptr); break; \ + case 1: __put_user_asm(sr, "stb", __x, ptr); break; \ + case 2: __put_user_asm(sr, "sth", __x, ptr); break; \ + case 4: __put_user_asm(sr, "stw", __x, ptr); break; \ + case 8: STD_USER(sr, __x, ptr); break; \ default: BUILD_BUG(); \ } \ \ @@ -156,10 +147,20 @@ struct exception_table_entry { #define __put_user(x, ptr) \ ({ \ - load_sr2(); \ - __put_user_internal(x, ptr); \ + __put_user_internal("%%sr3,", x, ptr); \ }) +#define __put_kernel_nofault(dst, src, type, err_label) \ +{ \ + type __z = *(type *)(src); \ + long __err; \ + __err = __put_user_internal("%%sr0,", __z, (type *)(dst)); \ + if (unlikely(__err)) \ + goto err_label; \ +} + + + /* * The "__put_user/kernel_asm()" macros tell gcc they read from memory @@ -170,26 +171,26 @@ struct exception_table_entry { * r8 is already listed as err. */ -#define __put_user_asm(stx, x, ptr) \ - __asm__ __volatile__ ( \ - "1: " stx " %2,0(%%sr2,%1)\n" \ - "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - : "=r"(__pu_err) \ +#define __put_user_asm(sr, stx, x, ptr) \ + __asm__ __volatile__ ( \ + "1: " stx " %2,0(" sr "%1)\n" \ + "9:\n" \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + : "=r"(__pu_err) \ : "r"(ptr), "r"(x), "0"(__pu_err)) #if !defined(CONFIG_64BIT) -#define __put_user_asm64(__val, ptr) do { \ - __asm__ __volatile__ ( \ - "1: stw %2,0(%%sr2,%1)\n" \ - "2: stw %R2,4(%%sr2,%1)\n" \ - "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ - : "=r"(__pu_err) \ - : "r"(ptr), "r"(__val), "0"(__pu_err)); \ +#define __put_user_asm64(sr, __val, ptr) do { \ + __asm__ __volatile__ ( \ + "1: stw %2,0(" sr "%1)\n" \ + "2: stw %R2,4(" sr "%1)\n" \ + "9:\n" \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + : "=r"(__pu_err) \ + : "r"(ptr), "r"(__val), "0"(__pu_err)); \ } while (0) #endif /* !defined(CONFIG_64BIT) */ @@ -200,14 +201,12 @@ struct exception_table_entry { */ extern long strncpy_from_user(char *, const char __user *, long); -extern unsigned lclear_user(void __user *, unsigned long); -extern long lstrnlen_user(const char __user *, long); +extern __must_check unsigned lclear_user(void __user *, unsigned long); +extern __must_check long strnlen_user(const char __user *src, long n); /* * Complex access routines -- macros */ -#define user_addr_max() (~0UL) -#define strnlen_user lstrnlen_user #define clear_user lclear_user #define __clear_user lclear_user diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index 33113ba24054..22924a3f1728 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -230,7 +230,6 @@ int main(void) DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); - DEFINE(TI_SEGMENT, offsetof(struct thread_info, addr_limit)); DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); DEFINE(THREAD_SZ, sizeof(struct thread_info)); /* THREAD_SZ_ALGN includes space for a stack frame. */ diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index e8a6a751dfd8..00297e8e1c88 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -32,7 +32,6 @@ EXPORT_SYMBOL(__xchg64); #include <linux/uaccess.h> EXPORT_SYMBOL(lclear_user); -EXPORT_SYMBOL(lstrnlen_user); #ifndef CONFIG_64BIT /* Needed so insmod can set dp value */ diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 3fb86ee507dd..cceb09855e03 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -150,8 +150,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_PA11 dma_ops_init(); #endif - - clear_sched_clock_stable(); } /* diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index db1a47cf424d..bbfe23c40c01 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -237,18 +237,22 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, #endif usp = (regs->gr[30] & ~(0x01UL)); + sigframe_size = PARISC_RT_SIGFRAME_SIZE; #ifdef CONFIG_64BIT if (is_compat_task()) { /* The gcc alloca implementation leaves garbage in the upper 32 bits of sp */ usp = (compat_uint_t)usp; + sigframe_size = PARISC_RT_SIGFRAME_SIZE32; } #endif - /*FIXME: frame_size parameter is unused, remove it. */ - frame = get_sigframe(&ksig->ka, usp, sizeof(*frame)); + frame = get_sigframe(&ksig->ka, usp, sigframe_size); DBG(1,"SETUP_RT_FRAME: START\n"); DBG(1,"setup_rt_frame: frame %p info %p\n", frame, ksig->info); + start = (unsigned long) frame; + if (start >= user_addr_max() - sigframe_size) + return -EFAULT; #ifdef CONFIG_64BIT @@ -284,32 +288,21 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, already in userspace. The first words of tramp are used to save the previous sigrestartblock trampoline that might be on the stack. We start the sigreturn trampoline at - SIGRESTARTBLOCK_TRAMP+X. */ + SIGRESTARTBLOCK_TRAMP. */ err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0, &frame->tramp[SIGRESTARTBLOCK_TRAMP+0]); - err |= __put_user(INSN_LDI_R20, - &frame->tramp[SIGRESTARTBLOCK_TRAMP+1]); err |= __put_user(INSN_BLE_SR2_R0, + &frame->tramp[SIGRESTARTBLOCK_TRAMP+1]); + err |= __put_user(INSN_LDI_R20, &frame->tramp[SIGRESTARTBLOCK_TRAMP+2]); - err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]); - -#if DEBUG_SIG - /* Assert that we're flushing in the correct space... */ - { - unsigned long sid; - asm ("mfsp %%sr3,%0" : "=r" (sid)); - DBG(1,"setup_rt_frame: Flushing 64 bytes at space %#x offset %p\n", - sid, frame->tramp); - } -#endif - start = (unsigned long) &frame->tramp[0]; - end = (unsigned long) &frame->tramp[TRAMP_SIZE]; + start = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+0]; + end = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]; flush_user_dcache_range_asm(start, end); flush_user_icache_range_asm(start, end); /* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP - * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP + * TRAMP Words 5-7, Length 3 = SIGRETURN_TRAMP * So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP */ rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP]; @@ -353,11 +346,6 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, /* The syscall return path will create IAOQ values from r31. */ - sigframe_size = PARISC_RT_SIGFRAME_SIZE; -#ifdef CONFIG_64BIT - if (is_compat_task()) - sigframe_size = PARISC_RT_SIGFRAME_SIZE32; -#endif if (in_syscall) { regs->gr[31] = haddr; #ifdef CONFIG_64BIT @@ -501,7 +489,6 @@ syscall_restart(struct pt_regs *regs, struct k_sigaction *ka) DBG(1,"ERESTARTNOHAND: returning -EINTR\n"); regs->gr[28] = -EINTR; break; - case -ERESTARTSYS: if (!(ka->sa.sa_flags & SA_RESTART)) { DBG(1,"ERESTARTSYS: putting -EINTR\n"); @@ -529,6 +516,10 @@ insert_restart_trampoline(struct pt_regs *regs) unsigned long end = (unsigned long) &usp[5]; long err = 0; + /* check that we don't exceed the stack */ + if (A(&usp[0]) >= user_addr_max() - 5 * sizeof(int)) + return; + /* Setup a trampoline to restart the syscall * with __NR_restart_syscall * @@ -569,10 +560,6 @@ insert_restart_trampoline(struct pt_regs *regs) } /* - * Note that 'init' is a special process: it doesn't get signals it doesn't - * want to handle. Thus you cannot kill init even with a SIGKILL even by - * mistake. - * * We need to be able to restore the syscall arguments (r21-r26) to * restart syscalls. Thus, the syscall path should save them in the * pt_regs structure (it's okay to do so since they are caller-save diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h index f166250f2d06..a5bdbb5678b7 100644 --- a/arch/parisc/kernel/signal32.h +++ b/arch/parisc/kernel/signal32.h @@ -36,7 +36,7 @@ struct compat_regfile { compat_int_t rf_sar; }; -#define COMPAT_SIGRETURN_TRAMP 4 +#define COMPAT_SIGRETURN_TRAMP 3 #define COMPAT_SIGRESTARTBLOCK_TRAMP 5 #define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \ COMPAT_SIGRESTARTBLOCK_TRAMP) diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 08e4d480abe1..9fb1e794831b 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -265,6 +265,9 @@ static int __init init_cr16_clocksource(void) (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc)) continue; + /* mark sched_clock unstable */ + clear_sched_clock_stable(); + clocksource_cr16.name = "cr16_unstable"; clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; clocksource_cr16.rating = 0; @@ -272,10 +275,6 @@ static int __init init_cr16_clocksource(void) } } - /* XXX: We may want to mark sched_clock stable here if cr16 clocks are - * in sync: - * (clocksource_cr16.flags == CLOCK_SOURCE_IS_CONTINUOUS) */ - /* register at clocksource framework */ clocksource_register_hz(&clocksource_cr16, 100 * PAGE0->mem_10msec); diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c index f03adb1999e7..367f6397bda7 100644 --- a/arch/parisc/lib/iomap.c +++ b/arch/parisc/lib/iomap.c @@ -513,12 +513,15 @@ void ioport_unmap(void __iomem *addr) } } +#ifdef CONFIG_PCI void pci_iounmap(struct pci_dev *dev, void __iomem * addr) { if (!INDIRECT_ADDR(addr)) { iounmap(addr); } } +EXPORT_SYMBOL(pci_iounmap); +#endif EXPORT_SYMBOL(ioread8); EXPORT_SYMBOL(ioread16); @@ -544,4 +547,3 @@ EXPORT_SYMBOL(iowrite16_rep); EXPORT_SYMBOL(iowrite32_rep); EXPORT_SYMBOL(ioport_map); EXPORT_SYMBOL(ioport_unmap); -EXPORT_SYMBOL(pci_iounmap); diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S index 36d6a8638ead..b428d29e45fb 100644 --- a/arch/parisc/lib/lusercopy.S +++ b/arch/parisc/lib/lusercopy.S @@ -28,21 +28,6 @@ #include <linux/linkage.h> /* - * get_sr gets the appropriate space value into - * sr1 for kernel/user space access, depending - * on the flag stored in the task structure. - */ - - .macro get_sr - mfctl %cr30,%r1 - ldw TI_SEGMENT(%r1),%r22 - mfsp %sr3,%r1 - or,<> %r22,%r0,%r0 - copy %r0,%r1 - mtsp %r1,%sr1 - .endm - - /* * unsigned long lclear_user(void *to, unsigned long n) * * Returns 0 for success. @@ -51,10 +36,9 @@ ENTRY_CFI(lclear_user) comib,=,n 0,%r25,$lclu_done - get_sr $lclu_loop: addib,<> -1,%r25,$lclu_loop -1: stbs,ma %r0,1(%sr1,%r26) +1: stbs,ma %r0,1(%sr3,%r26) $lclu_done: bv %r0(%r2) @@ -67,40 +51,6 @@ $lclu_done: ENDPROC_CFI(lclear_user) - /* - * long lstrnlen_user(char *s, long n) - * - * Returns 0 if exception before zero byte or reaching N, - * N+1 if N would be exceeded, - * else strlen + 1 (i.e. includes zero byte). - */ - -ENTRY_CFI(lstrnlen_user) - comib,= 0,%r25,$lslen_nzero - copy %r26,%r24 - get_sr -1: ldbs,ma 1(%sr1,%r26),%r1 -$lslen_loop: - comib,=,n 0,%r1,$lslen_done - addib,<> -1,%r25,$lslen_loop -2: ldbs,ma 1(%sr1,%r26),%r1 -$lslen_done: - bv %r0(%r2) - sub %r26,%r24,%r28 - -$lslen_nzero: - b $lslen_done - ldo 1(%r26),%r26 /* special case for N == 0 */ - -3: b $lslen_done - copy %r24,%r26 /* reset r26 so 0 is returned on fault */ - - ASM_EXCEPTIONTABLE_ENTRY(1b,3b) - ASM_EXCEPTIONTABLE_ENTRY(2b,3b) - -ENDPROC_CFI(lstrnlen_user) - - /* * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) * diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 6900d0ac2421..089ee3ea55c8 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -35,7 +35,6 @@ endif BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -O2 -msoft-float -mno-altivec -mno-vsx \ -pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ - -include $(srctree)/include/linux/compiler_attributes.h \ $(LINUXINCLUDE) ifdef CONFIG_PPC64_BOOT_WRAPPER @@ -70,6 +69,7 @@ ifeq ($(call cc-option-yn, -fstack-protector),y) BOOTCFLAGS += -fno-stack-protector endif +BOOTCFLAGS += -include $(srctree)/include/linux/compiler_attributes.h BOOTCFLAGS += -I$(objtree)/$(obj) -I$(srctree)/$(obj) DTC_FLAGS ?= -p 1024 diff --git a/arch/powerpc/include/asm/asm-const.h b/arch/powerpc/include/asm/asm-const.h index 0ce2368bd20f..dbfa5e1e3198 100644 --- a/arch/powerpc/include/asm/asm-const.h +++ b/arch/powerpc/include/asm/asm-const.h @@ -12,16 +12,6 @@ # define ASM_CONST(x) __ASM_CONST(x) #endif -/* - * Inline assembly memory constraint - * - * GCC 4.9 doesn't properly handle pre update memory constraint "m<>" - * - */ -#if defined(GCC_VERSION) && GCC_VERSION < 50000 -#define UPD_CONSTR "" -#else #define UPD_CONSTR "<>" -#endif #endif /* _ASM_POWERPC_ASM_CONST_H */ diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index a73f3f70a657..de10a2697258 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -18,6 +18,7 @@ #include <asm/switch_to.h> #include <asm/syscall.h> #include <asm/time.h> +#include <asm/tm.h> #include <asm/unistd.h> #if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32) @@ -136,6 +137,48 @@ notrace long system_call_exception(long r3, long r4, long r5, */ irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); + /* + * If system call is called with TM active, set _TIF_RESTOREALL to + * prevent RFSCV being used to return to userspace, because POWER9 + * TM implementation has problems with this instruction returning to + * transactional state. Final register values are not relevant because + * the transaction will be aborted upon return anyway. Or in the case + * of unsupported_scv SIGILL fault, the return state does not much + * matter because it's an edge case. + */ + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && + unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) + current_thread_info()->flags |= _TIF_RESTOREALL; + + /* + * If the system call was made with a transaction active, doom it and + * return without performing the system call. Unless it was an + * unsupported scv vector, in which case it's treated like an illegal + * instruction. + */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) && + !trap_is_unsupported_scv(regs)) { + /* Enable TM in the kernel, and disable EE (for scv) */ + hard_irq_disable(); + mtmsr(mfmsr() | MSR_TM); + + /* tabort, this dooms the transaction, nothing else */ + asm volatile(".long 0x7c00071d | ((%0) << 16)" + :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)); + + /* + * Userspace will never see the return value. Execution will + * resume after the tbegin. of the aborted transaction with the + * checkpointed register state. A context switch could occur + * or signal delivered to the process before resuming the + * doomed transaction context, but that should all be handled + * as expected. + */ + return -ENOSYS; + } +#endif // CONFIG_PPC_TRANSACTIONAL_MEM + local_irq_enable(); if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) { diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index d4212d2ff0b5..ec950b08a8dc 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -12,7 +12,6 @@ #include <asm/mmu.h> #include <asm/ppc_asm.h> #include <asm/ptrace.h> -#include <asm/tm.h> .section ".toc","aw" SYS_CALL_TABLE: @@ -55,12 +54,6 @@ COMPAT_SYS_CALL_TABLE: .globl system_call_vectored_\name system_call_vectored_\name: _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -BEGIN_FTR_SECTION - extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ - bne tabort_syscall -END_FTR_SECTION_IFSET(CPU_FTR_TM) -#endif SCV_INTERRUPT_TO_KERNEL mr r10,r1 ld r1,PACAKSAVE(r13) @@ -247,12 +240,6 @@ _ASM_NOKPROBE_SYMBOL(system_call_common_real) .globl system_call_common system_call_common: _ASM_NOKPROBE_SYMBOL(system_call_common) -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -BEGIN_FTR_SECTION - extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ - bne tabort_syscall -END_FTR_SECTION_IFSET(CPU_FTR_TM) -#endif mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) @@ -425,34 +412,6 @@ SOFT_MASK_TABLE(.Lsyscall_rst_start, 1b) RESTART_TABLE(.Lsyscall_rst_start, .Lsyscall_rst_end, syscall_restart) #endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -tabort_syscall: -_ASM_NOKPROBE_SYMBOL(tabort_syscall) - /* Firstly we need to enable TM in the kernel */ - mfmsr r10 - li r9, 1 - rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG - mtmsrd r10, 0 - - /* tabort, this dooms the transaction, nothing else */ - li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT) - TABORT(R9) - - /* - * Return directly to userspace. We have corrupted user register state, - * but userspace will never see that register state. Execution will - * resume after the tbegin of the aborted transaction with the - * checkpointed register state. - */ - li r9, MSR_RI - andc r10, r10, r9 - mtmsrd r10, 1 - mtspr SPRN_SRR0, r11 - mtspr SPRN_SRR1, r12 - RFI_TO_USER - b . /* prevent speculative execution */ -#endif - /* * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not * touched, no exit work created, then this can be used. diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 47a683cd00d2..fd829f7f25a4 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -249,6 +249,7 @@ void machine_check_queue_event(void) { int index; struct machine_check_event evt; + unsigned long msr; if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; @@ -262,8 +263,20 @@ void machine_check_queue_event(void) memcpy(&local_paca->mce_info->mce_event_queue[index], &evt, sizeof(evt)); - /* Queue irq work to process this event later. */ - irq_work_queue(&mce_event_process_work); + /* + * Queue irq work to process this event later. Before + * queuing the work enable translation for non radix LPAR, + * as irq_work_queue may try to access memory outside RMO + * region. + */ + if (!radix_enabled() && firmware_has_feature(FW_FEATURE_LPAR)) { + msr = mfmsr(); + mtmsr(msr | MSR_IR | MSR_DR); + irq_work_queue(&mce_event_process_work); + mtmsr(msr); + } else { + irq_work_queue(&mce_event_process_work); + } } void mce_common_process_ue(struct pt_regs *regs, diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 75079397c2a5..90484425a1e6 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2536,7 +2536,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST) /* The following code handles the fake_suspend = 1 case */ mflr r0 std r0, PPC_LR_STKOFF(r1) - stdu r1, -PPC_MIN_STKFRM(r1) + stdu r1, -TM_FRAME_SIZE(r1) /* Turn on TM. */ mfmsr r8 @@ -2551,10 +2551,42 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) nop + /* + * It's possible that treclaim. may modify registers, if we have lost + * track of fake-suspend state in the guest due to it using rfscv. + * Save and restore registers in case this occurs. + */ + mfspr r3, SPRN_DSCR + mfspr r4, SPRN_XER + mfspr r5, SPRN_AMR + /* SPRN_TAR would need to be saved here if the kernel ever used it */ + mfcr r12 + SAVE_NVGPRS(r1) + SAVE_GPR(2, r1) + SAVE_GPR(3, r1) + SAVE_GPR(4, r1) + SAVE_GPR(5, r1) + stw r12, 8(r1) + std r1, HSTATE_HOST_R1(r13) + /* We have to treclaim here because that's the only way to do S->N */ li r3, TM_CAUSE_KVM_RESCHED TRECLAIM(R3) + GET_PACA(r13) + ld r1, HSTATE_HOST_R1(r13) + REST_GPR(2, r1) + REST_GPR(3, r1) + REST_GPR(4, r1) + REST_GPR(5, r1) + lwz r12, 8(r1) + REST_NVGPRS(r1) + mtspr SPRN_DSCR, r3 + mtspr SPRN_XER, r4 + mtspr SPRN_AMR, r5 + mtcr r12 + HMT_MEDIUM + /* * We were in fake suspend, so we are not going to save the * register state as the guest checkpointed state (since @@ -2582,7 +2614,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) std r5, VCPU_TFHAR(r9) std r6, VCPU_TFIAR(r9) - addi r1, r1, PPC_MIN_STKFRM + addi r1, r1, TM_FRAME_SIZE ld r0, PPC_LR_STKOFF(r1) mtlr r0 blr diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 5c1a157a83b8..244a727c6ba4 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -348,9 +348,9 @@ static int xics_host_map(struct irq_domain *domain, unsigned int virq, if (xics_ics->check(xics_ics, hwirq)) return -EINVAL; - /* No chip data for the XICS domain */ + /* Let the ICS be the chip data for the XICS domain. For ICS native */ irq_domain_set_info(domain, virq, hwirq, xics_ics->chip, - NULL, handle_fasteoi_irq, NULL, NULL); + xics_ics, handle_fasteoi_irq, NULL, NULL); return 0; } diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c79955655fa4..301a54233c7e 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -41,6 +41,7 @@ config RISCV select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if 64BIT select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU + select BUILDTIME_TABLE_SORT if MMU select CLONE_BACKWARDS select CLINT_TIMER if !MMU select COMMON_CLK @@ -235,7 +236,7 @@ config ARCH_RV32I config ARCH_RV64I bool "RV64I" select 64BIT - select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && GCC_VERSION >= 50000 + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && $(cc-option,-fpatchable-function-entry=8) select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 01906a90c501..0eb4568fbd29 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -132,8 +132,11 @@ $(BOOT_TARGETS): vmlinux Image.%: Image $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -zinstall install: - $(Q)$(MAKE) $(build)=$(boot) $@ +install: install-image = Image +zinstall: install-image = Image.gz +install zinstall: + $(CONFIG_SHELL) $(srctree)/$(boot)/install.sh $(KERNELRELEASE) \ + $(boot)/$(install-image) System.map "$(INSTALL_PATH)" archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index 6bf299f70c27..becd0621071c 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -58,11 +58,3 @@ $(obj)/Image.lzo: $(obj)/Image FORCE $(obj)/loader.bin: $(obj)/loader FORCE $(call if_changed,objcopy) - -install: - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ - $(obj)/Image System.map "$(INSTALL_PATH)" - -zinstall: - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ - $(obj)/Image.gz System.map "$(INSTALL_PATH)" diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts index baea7d204639..b254c60589a1 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts @@ -16,10 +16,14 @@ aliases { ethernet0 = &emac1; + serial0 = &serial0; + serial1 = &serial1; + serial2 = &serial2; + serial3 = &serial3; }; chosen { - stdout-path = &serial0; + stdout-path = "serial0:115200n8"; }; cpus { diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index bc68231a8fb7..4ebc80315f01 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -39,10 +39,12 @@ CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_HOST_GENERIC=y CONFIG_PCIE_XILINX=y +CONFIG_PCIE_FU740=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_BLK_DEV_LOOP=y CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_NVME=m CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_SCSI_VIRTIO=y @@ -108,6 +110,8 @@ CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y CONFIG_9P_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=m CONFIG_CRYPTO_USER_API_HASH=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index f4b490cd0e5d..f53c40026c7a 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -42,6 +42,9 @@ */ #define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2) +#ifdef CONFIG_64BIT +#define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) +#endif /* * This yields a mask that user programs can use to figure out what * instruction set this CPU supports. This could be done in user space, diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index d86781357044..90deabfe63ea 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -113,7 +113,7 @@ static void fill_cacheinfo(struct cacheinfo **this_leaf, } } -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct device_node *np = of_cpu_device_node_get(cpu); @@ -155,7 +155,7 @@ static int __init_cache_level(unsigned int cpu) return 0; } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf = this_cpu_ci->info_list; @@ -187,6 +187,3 @@ static int __populate_cache_leaves(unsigned int cpu) return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S index af776555ded9..9c9f35091ef0 100644 --- a/arch/riscv/kernel/vmlinux-xip.lds.S +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -121,7 +121,6 @@ SECTIONS } BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0) - EXCEPTION_TABLE(0x10) .rel.dyn : AT(ADDR(.rel.dyn) - LOAD_OFFSET) { *(.rel.dyn*) diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 502d0826ecb1..5104f3a871e3 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -4,6 +4,8 @@ * Copyright (C) 2017 SiFive */ +#define RO_EXCEPTION_TABLE_ALIGN 16 + #ifdef CONFIG_XIP_KERNEL #include "vmlinux-xip.lds.S" #else @@ -112,8 +114,6 @@ SECTIONS *(.srodata*) } - EXCEPTION_TABLE(0x10) - . = ALIGN(SECTION_ALIGN); _data = .; diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2bd90c51efd3..b86de61b8caa 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -685,16 +685,6 @@ config STACK_GUARD The minimum size for the stack guard should be 256 for 31 bit and 512 for 64 bit. -config WARN_DYNAMIC_STACK - def_bool n - prompt "Emit compiler warnings for function with dynamic stack usage" - help - This option enables the compiler option -mwarn-dynamicstack. If the - compiler supports this options generates warnings for functions - that dynamically allocate stack space using alloca. - - Say N if you are unsure. - endmenu menu "I/O subsystem" diff --git a/arch/s390/Makefile b/arch/s390/Makefile index a3cf33ad009f..450b351dfa8e 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -85,13 +85,6 @@ cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD) endif endif -ifdef CONFIG_WARN_DYNAMIC_STACK - ifneq ($(call cc-option,-mwarn-dynamicstack),) - KBUILD_CFLAGS += -mwarn-dynamicstack - KBUILD_CFLAGS_DECOMPRESSOR += -mwarn-dynamicstack - endif -endif - ifdef CONFIG_EXPOLINE ifneq ($(call cc-option,$(CC_FLAGS_MARCH) -mindirect-branch=thunk),) CC_FLAGS_EXPOLINE := -mindirect-branch=thunk diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 37b6115ed80e..6aad18ee131d 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -10,6 +10,7 @@ CONFIG_BPF_JIT=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_LSM=y CONFIG_PREEMPT=y +CONFIG_SCHED_CORE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y @@ -503,6 +504,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MICROSOFT is not set +# CONFIG_NET_VENDOR_LITEX is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m @@ -661,7 +663,6 @@ CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_NFSD_V4_SECURITY_LABEL=y CONFIG_CIFS=m -CONFIG_CIFS_WEAK_PW_HASH=y CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y @@ -720,6 +721,8 @@ CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_BLAKE2S=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA3=m @@ -774,7 +777,6 @@ CONFIG_RANDOM32_SELFTEST=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 CONFIG_DMA_API_DEBUG=y -CONFIG_STRING_SELFTEST=y CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y @@ -853,12 +855,12 @@ CONFIG_FAIL_FUNCTION=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LKDTM=m CONFIG_TEST_MIN_HEAP=y -CONFIG_TEST_SORT=y CONFIG_KPROBES_SANITY_TEST=y CONFIG_RBTREE_TEST=y CONFIG_INTERVAL_TREE_TEST=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y +CONFIG_STRING_SELFTEST=y CONFIG_TEST_BITOPS=m CONFIG_TEST_BPF=m CONFIG_TEST_LIVEPATCH=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 56a1cc85c5d7..f08b161c9446 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -8,6 +8,7 @@ CONFIG_BPF_SYSCALL=y CONFIG_BPF_JIT=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_LSM=y +CONFIG_SCHED_CORE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y @@ -494,6 +495,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set # CONFIG_NET_VENDOR_MICROSOFT is not set +# CONFIG_NET_VENDOR_LITEX is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m @@ -648,7 +650,6 @@ CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_NFSD_V4_SECURITY_LABEL=y CONFIG_CIFS=m -CONFIG_CIFS_WEAK_PW_HASH=y CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y @@ -708,6 +709,8 @@ CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m CONFIG_CRYPTO_BLAKE2S=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_SHA3=m diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index ae683aa623ac..c5b35ea129cf 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -159,7 +159,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, mmap_read_lock(current->mm); ret = -EINVAL; - vma = find_vma(current->mm, mmio_addr); + vma = vma_lookup(current->mm, mmio_addr); if (!vma) goto out_unlock_mmap; if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) @@ -298,7 +298,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, mmap_read_lock(current->mm); ret = -EINVAL; - vma = find_vma(current->mm, mmio_addr); + vma = vma_lookup(current->mm, mmio_addr); if (!vma) goto out_unlock_mmap; if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) diff --git a/arch/sh/boot/Makefile b/arch/sh/boot/Makefile index 58592dfa5cb6..c081e7e2d6e7 100644 --- a/arch/sh/boot/Makefile +++ b/arch/sh/boot/Makefile @@ -80,30 +80,30 @@ $(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE $(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE $(call if_changed,lzo) -$(obj)/uImage.bz2: $(obj)/vmlinux.bin.bz2 +$(obj)/uImage.bz2: $(obj)/vmlinux.bin.bz2 FORCE $(call if_changed,uimage,bzip2) -$(obj)/uImage.gz: $(obj)/vmlinux.bin.gz +$(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE $(call if_changed,uimage,gzip) -$(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma +$(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma FORCE $(call if_changed,uimage,lzma) -$(obj)/uImage.xz: $(obj)/vmlinux.bin.xz +$(obj)/uImage.xz: $(obj)/vmlinux.bin.xz FORCE $(call if_changed,uimage,xz) -$(obj)/uImage.lzo: $(obj)/vmlinux.bin.lzo +$(obj)/uImage.lzo: $(obj)/vmlinux.bin.lzo FORCE $(call if_changed,uimage,lzo) -$(obj)/uImage.bin: $(obj)/vmlinux.bin +$(obj)/uImage.bin: $(obj)/vmlinux.bin FORCE $(call if_changed,uimage,none) OBJCOPYFLAGS_vmlinux.srec := -I binary -O srec -$(obj)/vmlinux.srec: $(obj)/compressed/vmlinux +$(obj)/vmlinux.srec: $(obj)/compressed/vmlinux FORCE $(call if_changed,objcopy) OBJCOPYFLAGS_uImage.srec := -I binary -O srec -$(obj)/uImage.srec: $(obj)/uImage +$(obj)/uImage.srec: $(obj)/uImage FORCE $(call if_changed,objcopy) $(obj)/uImage: $(obj)/uImage.$(suffix-y) diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 8e1d72a16759..7ceae24b0ca9 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -356,7 +356,9 @@ err_nomem: void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - if (!sparc_dma_free_resource(cpu_addr, PAGE_ALIGN(size))) + size = PAGE_ALIGN(size); + + if (!sparc_dma_free_resource(cpu_addr, size)) return; dma_make_coherent(dma_addr, size); diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 8e645ddac58e..30f171b7b00c 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -39,6 +39,7 @@ struct mdesc_hdr { u32 node_sz; /* node block size */ u32 name_sz; /* name block size */ u32 data_sz; /* data block size */ + char data[]; } __attribute__((aligned(16))); struct mdesc_elem { @@ -612,7 +613,7 @@ EXPORT_SYMBOL(mdesc_get_node_info); static struct mdesc_elem *node_block(struct mdesc_hdr *mdesc) { - return (struct mdesc_elem *) (mdesc + 1); + return (struct mdesc_elem *) mdesc->data; } static void *name_block(struct mdesc_hdr *mdesc) diff --git a/arch/sparc/lib/iomap.c b/arch/sparc/lib/iomap.c index c9da9f139694..f3a8cd491ce0 100644 --- a/arch/sparc/lib/iomap.c +++ b/arch/sparc/lib/iomap.c @@ -19,8 +19,10 @@ void ioport_unmap(void __iomem *addr) EXPORT_SYMBOL(ioport_map); EXPORT_SYMBOL(ioport_unmap); +#ifdef CONFIG_PCI void pci_iounmap(struct pci_dev *dev, void __iomem * addr) { /* nothing to do */ } EXPORT_SYMBOL(pci_iounmap); +#endif diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4e001bbbb425..dad7f85dcdea 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -339,6 +339,11 @@ config NEED_PER_CPU_PAGE_FIRST_CHUNK config ARCH_HIBERNATION_POSSIBLE def_bool y +config ARCH_NR_GPIO + int + default 1024 if X86_64 + default 512 + config ARCH_SUSPEND_POSSIBLE def_bool y diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index e7355f8b51c2..94834c4b5e5e 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -4,6 +4,12 @@ tune = $(call cc-option,-mtune=$(1),$(2)) +ifdef CONFIG_CC_IS_CLANG +align := -falign-functions=0 $(call cc-option,-falign-jumps=0) $(call cc-option,-falign-loops=0) +else +align := -falign-functions=0 -falign-jumps=0 -falign-loops=0 +endif + cflags-$(CONFIG_M486SX) += -march=i486 cflags-$(CONFIG_M486) += -march=i486 cflags-$(CONFIG_M586) += -march=i586 @@ -19,11 +25,11 @@ cflags-$(CONFIG_MK6) += -march=k6 # They make zero difference whatsosever to performance at this time. cflags-$(CONFIG_MK7) += -march=athlon cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) -cflags-$(CONFIG_MCRUSOE) += -march=i686 -falign-functions=0 -falign-jumps=0 -falign-loops=0 -cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) -falign-functions=0 -falign-jumps=0 -falign-loops=0 +cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align) +cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align) cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) -cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) -falign-functions=0 -falign-jumps=0 -falign-loops=0 +cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align) cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) cflags-$(CONFIG_MVIAC7) += -march=i686 cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 90e682a92820..32a1ad356c18 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -99,7 +99,8 @@ static void hv_apic_eoi_write(u32 reg, u32 val) /* * IPI implementation on Hyper-V. */ -static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) +static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, + bool exclude_self) { struct hv_send_ipi_ex **arg; struct hv_send_ipi_ex *ipi_arg; @@ -123,7 +124,10 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) if (!cpumask_equal(mask, cpu_present_mask)) { ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K; - nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask); + if (exclude_self) + nr_bank = cpumask_to_vpset_noself(&(ipi_arg->vp_set), mask); + else + nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask); } if (nr_bank < 0) goto ipi_mask_ex_done; @@ -138,15 +142,25 @@ ipi_mask_ex_done: return hv_result_success(status); } -static bool __send_ipi_mask(const struct cpumask *mask, int vector) +static bool __send_ipi_mask(const struct cpumask *mask, int vector, + bool exclude_self) { - int cur_cpu, vcpu; + int cur_cpu, vcpu, this_cpu = smp_processor_id(); struct hv_send_ipi ipi_arg; u64 status; + unsigned int weight; trace_hyperv_send_ipi_mask(mask, vector); - if (cpumask_empty(mask)) + weight = cpumask_weight(mask); + + /* + * Do nothing if + * 1. the mask is empty + * 2. the mask only contains self when exclude_self is true + */ + if (weight == 0 || + (exclude_self && weight == 1 && cpumask_test_cpu(this_cpu, mask))) return true; if (!hv_hypercall_pg) @@ -172,6 +186,8 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector) ipi_arg.cpu_mask = 0; for_each_cpu(cur_cpu, mask) { + if (exclude_self && cur_cpu == this_cpu) + continue; vcpu = hv_cpu_number_to_vp_number(cur_cpu); if (vcpu == VP_INVAL) return false; @@ -191,7 +207,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector) return hv_result_success(status); do_ex_hypercall: - return __send_ipi_mask_ex(mask, vector); + return __send_ipi_mask_ex(mask, vector, exclude_self); } static bool __send_ipi_one(int cpu, int vector) @@ -208,7 +224,7 @@ static bool __send_ipi_one(int cpu, int vector) return false; if (vp >= 64) - return __send_ipi_mask_ex(cpumask_of(cpu), vector); + return __send_ipi_mask_ex(cpumask_of(cpu), vector, false); status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp)); return hv_result_success(status); @@ -222,20 +238,13 @@ static void hv_send_ipi(int cpu, int vector) static void hv_send_ipi_mask(const struct cpumask *mask, int vector) { - if (!__send_ipi_mask(mask, vector)) + if (!__send_ipi_mask(mask, vector, false)) orig_apic.send_IPI_mask(mask, vector); } static void hv_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) { - unsigned int this_cpu = smp_processor_id(); - struct cpumask new_mask; - const struct cpumask *local_mask; - - cpumask_copy(&new_mask, mask); - cpumask_clear_cpu(this_cpu, &new_mask); - local_mask = &new_mask; - if (!__send_ipi_mask(local_mask, vector)) + if (!__send_ipi_mask(mask, vector, true)) orig_apic.send_IPI_mask_allbutself(mask, vector); } @@ -246,7 +255,7 @@ static void hv_send_ipi_allbutself(int vector) static void hv_send_ipi_all(int vector) { - if (!__send_ipi_mask(cpu_online_mask, vector)) + if (!__send_ipi_mask(cpu_online_mask, vector, false)) orig_apic.send_IPI_all(vector); } diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index c9fa7be3df82..5c95d242f38d 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -301,8 +301,8 @@ do { \ unsigned int __gu_low, __gu_high; \ const unsigned int __user *__gu_ptr; \ __gu_ptr = (const void __user *)(ptr); \ - __get_user_asm(__gu_low, ptr, "l", "=r", label); \ - __get_user_asm(__gu_high, ptr+1, "l", "=r", label); \ + __get_user_asm(__gu_low, __gu_ptr, "l", "=r", label); \ + __get_user_asm(__gu_high, __gu_ptr+1, "l", "=r", label); \ (x) = ((unsigned long long)__gu_high << 32) | __gu_low; \ } while (0) #else diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index d66af2950e06..b5e36bd0425b 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -985,7 +985,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, this_leaf->priv = base->nb; } -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -1014,7 +1014,7 @@ static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs) id4_regs->id = c->apicid >> index_msb; } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { unsigned int idx, ret; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -1033,6 +1033,3 @@ static int __populate_cache_leaves(unsigned int cpu) return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 8cb7816d03b4..193204aee880 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1253,6 +1253,9 @@ static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *fin static void kill_me_now(struct callback_head *ch) { + struct task_struct *p = container_of(ch, struct task_struct, mce_kill_me); + + p->mce_count = 0; force_sig(SIGBUS); } @@ -1262,6 +1265,7 @@ static void kill_me_maybe(struct callback_head *cb) int flags = MF_ACTION_REQUIRED; int ret; + p->mce_count = 0; pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr); if (!p->mce_ripv) @@ -1290,17 +1294,34 @@ static void kill_me_maybe(struct callback_head *cb) } } -static void queue_task_work(struct mce *m, int kill_current_task) +static void queue_task_work(struct mce *m, char *msg, int kill_current_task) { - current->mce_addr = m->addr; - current->mce_kflags = m->kflags; - current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV); - current->mce_whole_page = whole_page(m); + int count = ++current->mce_count; - if (kill_current_task) - current->mce_kill_me.func = kill_me_now; - else - current->mce_kill_me.func = kill_me_maybe; + /* First call, save all the details */ + if (count == 1) { + current->mce_addr = m->addr; + current->mce_kflags = m->kflags; + current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV); + current->mce_whole_page = whole_page(m); + + if (kill_current_task) + current->mce_kill_me.func = kill_me_now; + else + current->mce_kill_me.func = kill_me_maybe; + } + + /* Ten is likely overkill. Don't expect more than two faults before task_work() */ + if (count > 10) + mce_panic("Too many consecutive machine checks while accessing user data", m, msg); + + /* Second or later call, make sure page address matches the one from first call */ + if (count > 1 && (current->mce_addr >> PAGE_SHIFT) != (m->addr >> PAGE_SHIFT)) + mce_panic("Consecutive machine checks to different user pages", m, msg); + + /* Do not call task_work_add() more than once */ + if (count > 1) + return; task_work_add(current, ¤t->mce_kill_me, TWA_RESUME); } @@ -1438,7 +1459,7 @@ noinstr void do_machine_check(struct pt_regs *regs) /* If this triggers there is no way to recover. Die hard. */ BUG_ON(!on_thread_stack() || !user_mode(regs)); - queue_task_work(&m, kill_current_task); + queue_task_work(&m, msg, kill_current_task); } else { /* @@ -1456,7 +1477,7 @@ noinstr void do_machine_check(struct pt_regs *regs) } if (m.kflags & MCE_IN_KERNEL_COPYIN) - queue_task_work(&m, kill_current_task); + queue_task_work(&m, msg, kill_current_task); } out: mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 78a32b956e81..5afd98559193 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -135,7 +135,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) static void __init pcpu_fc_free(void *ptr, size_t size) { - memblock_free(__pa(ptr), size); + memblock_free_ptr(ptr, size); } static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a6e11763763f..36098226a957 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1432,18 +1432,18 @@ int kern_addr_valid(unsigned long addr) return 0; p4d = p4d_offset(pgd, addr); - if (p4d_none(*p4d)) + if (!p4d_present(*p4d)) return 0; pud = pud_offset(p4d, addr); - if (pud_none(*pud)) + if (!pud_present(*pud)) return 0; if (pud_large(*pud)) return pfn_valid(pud_pfn(*pud)); pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) + if (!pmd_present(*pmd)) return 0; if (pmd_large(*pmd)) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 1a50434c8a4d..ef885370719a 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -49,8 +49,7 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, p = early_alloc(PMD_SIZE, nid, false); if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL)) return; - else if (p) - memblock_free(__pa(p), PMD_SIZE); + memblock_free_ptr(p, PMD_SIZE); } p = early_alloc(PAGE_SIZE, nid, true); @@ -86,8 +85,7 @@ static void __init kasan_populate_pud(pud_t *pud, unsigned long addr, p = early_alloc(PUD_SIZE, nid, false); if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL)) return; - else if (p) - memblock_free(__pa(p), PUD_SIZE); + memblock_free_ptr(p, PUD_SIZE); } p = early_alloc(PAGE_SIZE, nid, true); diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index a1b5c71099e6..1e9b93b088db 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -355,7 +355,7 @@ void __init numa_reset_distance(void) /* numa_distance could be 1LU marking allocation failure, test cnt */ if (numa_distance_cnt) - memblock_free(__pa(numa_distance), size); + memblock_free_ptr(numa_distance, size); numa_distance_cnt = 0; numa_distance = NULL; /* enable table creation */ } diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index 737491b13728..e801e30089c4 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -517,8 +517,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) } /* free the copied physical distance table */ - if (phys_dist) - memblock_free(__pa(phys_dist), phys_size); + memblock_free_ptr(phys_dist, phys_size); return; no_emu: diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 3112ca7786ed..4ba2a3ee4bce 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -583,7 +583,12 @@ int memtype_reserve(u64 start, u64 end, enum page_cache_mode req_type, int err = 0; start = sanitize_phys(start); - end = sanitize_phys(end); + + /* + * The end address passed into this function is exclusive, but + * sanitize_phys() expects an inclusive address. + */ + end = sanitize_phys(end - 1) + 1; if (start >= end) { WARN(1, "%s failed: [mem %#010Lx-%#010Lx], req %s\n", __func__, start, end - 1, cattr_name(req_type)); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 753f63734c13..349f780a1567 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1214,6 +1214,11 @@ static void __init xen_dom0_set_legacy_features(void) x86_platform.legacy.rtc = 1; } +static void __init xen_domu_set_legacy_features(void) +{ + x86_platform.legacy.rtc = 0; +} + /* First C function to be called on Xen boot */ asmlinkage __visible void __init xen_start_kernel(void) { @@ -1359,6 +1364,8 @@ asmlinkage __visible void __init xen_start_kernel(void) add_preferred_console("xenboot", 0, NULL); if (pci_xen) x86_init.pci.arch_init = pci_xen_init; + x86_platform.set_legacy_features = + xen_domu_set_legacy_features; } else { const struct dom0_vga_console_info *info = (void *)((char *)xen_start_info + diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 1df5f01529e5..8d751939c6f3 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1518,14 +1518,17 @@ static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, if (pinned) { struct page *page = pfn_to_page(pfn); - if (static_branch_likely(&xen_struct_pages_ready)) + pinned = false; + if (static_branch_likely(&xen_struct_pages_ready)) { + pinned = PagePinned(page); SetPagePinned(page); + } xen_mc_batch(); __set_pfn_prot(pfn, PAGE_KERNEL_RO); - if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) + if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS && !pinned) __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); xen_mc_issue(PARAVIRT_LAZY_MMU); diff --git a/block/Makefile b/block/Makefile index 6cf40276d3cf..41aa1ba69c90 100644 --- a/block/Makefile +++ b/block/Makefile @@ -3,7 +3,7 @@ # Makefile for the kernel block layer # -obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \ +obj-$(CONFIG_BLOCK) := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-timeout.o \ blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ diff --git a/fs/block_dev.c b/block/bdev.c index 45df6cbccf12..cf2780cb44a7 100644 --- a/fs/block_dev.c +++ b/block/bdev.c @@ -7,12 +7,10 @@ #include <linux/init.h> #include <linux/mm.h> -#include <linux/fcntl.h> #include <linux/slab.h> #include <linux/kmod.h> #include <linux/major.h> #include <linux/device_cgroup.h> -#include <linux/highmem.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/module.h> @@ -20,30 +18,22 @@ #include <linux/magic.h> #include <linux/buffer_head.h> #include <linux/swap.h> -#include <linux/pagevec.h> #include <linux/writeback.h> -#include <linux/mpage.h> #include <linux/mount.h> #include <linux/pseudo_fs.h> #include <linux/uio.h> #include <linux/namei.h> -#include <linux/log2.h> #include <linux/cleancache.h> -#include <linux/task_io_accounting_ops.h> -#include <linux/falloc.h> #include <linux/part_stat.h> #include <linux/uaccess.h> -#include <linux/suspend.h> -#include "internal.h" -#include "../block/blk.h" +#include "../fs/internal.h" +#include "blk.h" struct bdev_inode { struct block_device bdev; struct inode vfs_inode; }; -static const struct address_space_operations def_blk_aops; - static inline struct bdev_inode *BDEV_I(struct inode *inode) { return container_of(inode, struct bdev_inode, vfs_inode); @@ -194,332 +184,6 @@ int sb_min_blocksize(struct super_block *sb, int size) EXPORT_SYMBOL(sb_min_blocksize); -static int -blkdev_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int create) -{ - bh->b_bdev = I_BDEV(inode); - bh->b_blocknr = iblock; - set_buffer_mapped(bh); - return 0; -} - -static struct inode *bdev_file_inode(struct file *file) -{ - return file->f_mapping->host; -} - -static unsigned int dio_bio_write_op(struct kiocb *iocb) -{ - unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; - - /* avoid the need for a I/O completion work item */ - if (iocb->ki_flags & IOCB_DSYNC) - op |= REQ_FUA; - return op; -} - -#define DIO_INLINE_BIO_VECS 4 - -static void blkdev_bio_end_io_simple(struct bio *bio) -{ - struct task_struct *waiter = bio->bi_private; - - WRITE_ONCE(bio->bi_private, NULL); - blk_wake_io_task(waiter); -} - -static ssize_t -__blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, - unsigned int nr_pages) -{ - struct file *file = iocb->ki_filp; - struct block_device *bdev = I_BDEV(bdev_file_inode(file)); - struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs; - loff_t pos = iocb->ki_pos; - bool should_dirty = false; - struct bio bio; - ssize_t ret; - blk_qc_t qc; - - if ((pos | iov_iter_alignment(iter)) & - (bdev_logical_block_size(bdev) - 1)) - return -EINVAL; - - if (nr_pages <= DIO_INLINE_BIO_VECS) - vecs = inline_vecs; - else { - vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec), - GFP_KERNEL); - if (!vecs) - return -ENOMEM; - } - - bio_init(&bio, vecs, nr_pages); - bio_set_dev(&bio, bdev); - bio.bi_iter.bi_sector = pos >> 9; - bio.bi_write_hint = iocb->ki_hint; - bio.bi_private = current; - bio.bi_end_io = blkdev_bio_end_io_simple; - bio.bi_ioprio = iocb->ki_ioprio; - - ret = bio_iov_iter_get_pages(&bio, iter); - if (unlikely(ret)) - goto out; - ret = bio.bi_iter.bi_size; - - if (iov_iter_rw(iter) == READ) { - bio.bi_opf = REQ_OP_READ; - if (iter_is_iovec(iter)) - should_dirty = true; - } else { - bio.bi_opf = dio_bio_write_op(iocb); - task_io_account_write(ret); - } - if (iocb->ki_flags & IOCB_NOWAIT) - bio.bi_opf |= REQ_NOWAIT; - if (iocb->ki_flags & IOCB_HIPRI) - bio_set_polled(&bio, iocb); - - qc = submit_bio(&bio); - for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (!READ_ONCE(bio.bi_private)) - break; - if (!(iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(bdev_get_queue(bdev), qc, true)) - blk_io_schedule(); - } - __set_current_state(TASK_RUNNING); - - bio_release_pages(&bio, should_dirty); - if (unlikely(bio.bi_status)) - ret = blk_status_to_errno(bio.bi_status); - -out: - if (vecs != inline_vecs) - kfree(vecs); - - bio_uninit(&bio); - - return ret; -} - -struct blkdev_dio { - union { - struct kiocb *iocb; - struct task_struct *waiter; - }; - size_t size; - atomic_t ref; - bool multi_bio : 1; - bool should_dirty : 1; - bool is_sync : 1; - struct bio bio; -}; - -static struct bio_set blkdev_dio_pool; - -static int blkdev_iopoll(struct kiocb *kiocb, bool wait) -{ - struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host); - struct request_queue *q = bdev_get_queue(bdev); - - return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait); -} - -static void blkdev_bio_end_io(struct bio *bio) -{ - struct blkdev_dio *dio = bio->bi_private; - bool should_dirty = dio->should_dirty; - - if (bio->bi_status && !dio->bio.bi_status) - dio->bio.bi_status = bio->bi_status; - - if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) { - if (!dio->is_sync) { - struct kiocb *iocb = dio->iocb; - ssize_t ret; - - if (likely(!dio->bio.bi_status)) { - ret = dio->size; - iocb->ki_pos += ret; - } else { - ret = blk_status_to_errno(dio->bio.bi_status); - } - - dio->iocb->ki_complete(iocb, ret, 0); - if (dio->multi_bio) - bio_put(&dio->bio); - } else { - struct task_struct *waiter = dio->waiter; - - WRITE_ONCE(dio->waiter, NULL); - blk_wake_io_task(waiter); - } - } - - if (should_dirty) { - bio_check_pages_dirty(bio); - } else { - bio_release_pages(bio, false); - bio_put(bio); - } -} - -static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - unsigned int nr_pages) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = bdev_file_inode(file); - struct block_device *bdev = I_BDEV(inode); - struct blk_plug plug; - struct blkdev_dio *dio; - struct bio *bio; - bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0; - bool is_read = (iov_iter_rw(iter) == READ), is_sync; - loff_t pos = iocb->ki_pos; - blk_qc_t qc = BLK_QC_T_NONE; - int ret = 0; - - if ((pos | iov_iter_alignment(iter)) & - (bdev_logical_block_size(bdev) - 1)) - return -EINVAL; - - bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool); - - dio = container_of(bio, struct blkdev_dio, bio); - dio->is_sync = is_sync = is_sync_kiocb(iocb); - if (dio->is_sync) { - dio->waiter = current; - bio_get(bio); - } else { - dio->iocb = iocb; - } - - dio->size = 0; - dio->multi_bio = false; - dio->should_dirty = is_read && iter_is_iovec(iter); - - /* - * Don't plug for HIPRI/polled IO, as those should go straight - * to issue - */ - if (!is_poll) - blk_start_plug(&plug); - - for (;;) { - bio_set_dev(bio, bdev); - bio->bi_iter.bi_sector = pos >> 9; - bio->bi_write_hint = iocb->ki_hint; - bio->bi_private = dio; - bio->bi_end_io = blkdev_bio_end_io; - bio->bi_ioprio = iocb->ki_ioprio; - - ret = bio_iov_iter_get_pages(bio, iter); - if (unlikely(ret)) { - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); - break; - } - - if (is_read) { - bio->bi_opf = REQ_OP_READ; - if (dio->should_dirty) - bio_set_pages_dirty(bio); - } else { - bio->bi_opf = dio_bio_write_op(iocb); - task_io_account_write(bio->bi_iter.bi_size); - } - if (iocb->ki_flags & IOCB_NOWAIT) - bio->bi_opf |= REQ_NOWAIT; - - dio->size += bio->bi_iter.bi_size; - pos += bio->bi_iter.bi_size; - - nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS); - if (!nr_pages) { - bool polled = false; - - if (iocb->ki_flags & IOCB_HIPRI) { - bio_set_polled(bio, iocb); - polled = true; - } - - qc = submit_bio(bio); - - if (polled) - WRITE_ONCE(iocb->ki_cookie, qc); - break; - } - - if (!dio->multi_bio) { - /* - * AIO needs an extra reference to ensure the dio - * structure which is embedded into the first bio - * stays around. - */ - if (!is_sync) - bio_get(bio); - dio->multi_bio = true; - atomic_set(&dio->ref, 2); - } else { - atomic_inc(&dio->ref); - } - - submit_bio(bio); - bio = bio_alloc(GFP_KERNEL, nr_pages); - } - - if (!is_poll) - blk_finish_plug(&plug); - - if (!is_sync) - return -EIOCBQUEUED; - - for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (!READ_ONCE(dio->waiter)) - break; - - if (!(iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(bdev_get_queue(bdev), qc, true)) - blk_io_schedule(); - } - __set_current_state(TASK_RUNNING); - - if (!ret) - ret = blk_status_to_errno(dio->bio.bi_status); - if (likely(!ret)) - ret = dio->size; - - bio_put(&dio->bio); - return ret; -} - -static ssize_t -blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) -{ - unsigned int nr_pages; - - if (!iov_iter_count(iter)) - return 0; - - nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); - if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS) - return __blkdev_direct_IO_simple(iocb, iter, nr_pages); - - return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages)); -} - -static __init int blkdev_init(void) -{ - return bioset_init(&blkdev_dio_pool, 4, - offsetof(struct blkdev_dio, bio), - BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE); -} -module_init(blkdev_init); - int __sync_blockdev(struct block_device *bdev, int wait) { if (!bdev) @@ -637,81 +301,6 @@ out: } EXPORT_SYMBOL(thaw_bdev); -static int blkdev_writepage(struct page *page, struct writeback_control *wbc) -{ - return block_write_full_page(page, blkdev_get_block, wbc); -} - -static int blkdev_readpage(struct file * file, struct page * page) -{ - return block_read_full_page(page, blkdev_get_block); -} - -static void blkdev_readahead(struct readahead_control *rac) -{ - mpage_readahead(rac, blkdev_get_block); -} - -static int blkdev_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) -{ - return block_write_begin(mapping, pos, len, flags, pagep, - blkdev_get_block); -} - -static int blkdev_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) -{ - int ret; - ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); - - unlock_page(page); - put_page(page); - - return ret; -} - -/* - * private llseek: - * for a block special file file_inode(file)->i_size is zero - * so we compute the size by hand (just as in block_read/write above) - */ -static loff_t block_llseek(struct file *file, loff_t offset, int whence) -{ - struct inode *bd_inode = bdev_file_inode(file); - loff_t retval; - - inode_lock(bd_inode); - retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode)); - inode_unlock(bd_inode); - return retval; -} - -static int blkdev_fsync(struct file *filp, loff_t start, loff_t end, - int datasync) -{ - struct inode *bd_inode = bdev_file_inode(filp); - struct block_device *bdev = I_BDEV(bd_inode); - int error; - - error = file_write_and_wait_range(filp, start, end); - if (error) - return error; - - /* - * There is no need to serialise calls to blkdev_issue_flush with - * i_mutex and doing so causes performance issues with concurrent - * O_SYNC writers to a block device. - */ - error = blkdev_issue_flush(bdev); - if (error == -EOPNOTSUPP) - error = 0; - - return error; -} - /** * bdev_read_page() - Start reading a page from a block device * @bdev: The device to read the page from @@ -1305,35 +894,6 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, } EXPORT_SYMBOL(blkdev_get_by_path); -static int blkdev_open(struct inode * inode, struct file * filp) -{ - struct block_device *bdev; - - /* - * Preserve backwards compatibility and allow large file access - * even if userspace doesn't ask for it explicitly. Some mkfs - * binary needs it. We might want to drop this workaround - * during an unstable branch. - */ - filp->f_flags |= O_LARGEFILE; - - filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; - - if (filp->f_flags & O_NDELAY) - filp->f_mode |= FMODE_NDELAY; - if (filp->f_flags & O_EXCL) - filp->f_mode |= FMODE_EXCL; - if ((filp->f_flags & O_ACCMODE) == 3) - filp->f_mode |= FMODE_WRITE_IOCTL; - - bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); - filp->f_mapping = bdev->bd_inode->i_mapping; - filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); - return 0; -} - void blkdev_put(struct block_device *bdev, fmode_t mode) { struct gendisk *disk = bdev->bd_disk; @@ -1397,203 +957,6 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) } EXPORT_SYMBOL(blkdev_put); -static int blkdev_close(struct inode * inode, struct file * filp) -{ - struct block_device *bdev = I_BDEV(bdev_file_inode(filp)); - blkdev_put(bdev, filp->f_mode); - return 0; -} - -static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) -{ - struct block_device *bdev = I_BDEV(bdev_file_inode(file)); - fmode_t mode = file->f_mode; - - /* - * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have - * to updated it before every ioctl. - */ - if (file->f_flags & O_NDELAY) - mode |= FMODE_NDELAY; - else - mode &= ~FMODE_NDELAY; - - return blkdev_ioctl(bdev, mode, cmd, arg); -} - -/* - * Write data to the block device. Only intended for the block device itself - * and the raw driver which basically is a fake block device. - * - * Does not take i_mutex for the write and thus is not for general purpose - * use. - */ -static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) -{ - struct file *file = iocb->ki_filp; - struct inode *bd_inode = bdev_file_inode(file); - loff_t size = i_size_read(bd_inode); - struct blk_plug plug; - size_t shorted = 0; - ssize_t ret; - - if (bdev_read_only(I_BDEV(bd_inode))) - return -EPERM; - - if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev)) - return -ETXTBSY; - - if (!iov_iter_count(from)) - return 0; - - if (iocb->ki_pos >= size) - return -ENOSPC; - - if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT) - return -EOPNOTSUPP; - - size -= iocb->ki_pos; - if (iov_iter_count(from) > size) { - shorted = iov_iter_count(from) - size; - iov_iter_truncate(from, size); - } - - blk_start_plug(&plug); - ret = __generic_file_write_iter(iocb, from); - if (ret > 0) - ret = generic_write_sync(iocb, ret); - iov_iter_reexpand(from, iov_iter_count(from) + shorted); - blk_finish_plug(&plug); - return ret; -} - -static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) -{ - struct file *file = iocb->ki_filp; - struct inode *bd_inode = bdev_file_inode(file); - loff_t size = i_size_read(bd_inode); - loff_t pos = iocb->ki_pos; - size_t shorted = 0; - ssize_t ret; - - if (pos >= size) - return 0; - - size -= pos; - if (iov_iter_count(to) > size) { - shorted = iov_iter_count(to) - size; - iov_iter_truncate(to, size); - } - - ret = generic_file_read_iter(iocb, to); - iov_iter_reexpand(to, iov_iter_count(to) + shorted); - return ret; -} - -static int blkdev_writepages(struct address_space *mapping, - struct writeback_control *wbc) -{ - return generic_writepages(mapping, wbc); -} - -static const struct address_space_operations def_blk_aops = { - .set_page_dirty = __set_page_dirty_buffers, - .readpage = blkdev_readpage, - .readahead = blkdev_readahead, - .writepage = blkdev_writepage, - .write_begin = blkdev_write_begin, - .write_end = blkdev_write_end, - .writepages = blkdev_writepages, - .direct_IO = blkdev_direct_IO, - .migratepage = buffer_migrate_page_norefs, - .is_dirty_writeback = buffer_check_dirty_writeback, -}; - -#define BLKDEV_FALLOC_FL_SUPPORTED \ - (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ - FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE) - -static long blkdev_fallocate(struct file *file, int mode, loff_t start, - loff_t len) -{ - struct block_device *bdev = I_BDEV(bdev_file_inode(file)); - loff_t end = start + len - 1; - loff_t isize; - int error; - - /* Fail if we don't recognize the flags. */ - if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED) - return -EOPNOTSUPP; - - /* Don't go off the end of the device. */ - isize = i_size_read(bdev->bd_inode); - if (start >= isize) - return -EINVAL; - if (end >= isize) { - if (mode & FALLOC_FL_KEEP_SIZE) { - len = isize - start; - end = start + len - 1; - } else - return -EINVAL; - } - - /* - * Don't allow IO that isn't aligned to logical block size. - */ - if ((start | len) & (bdev_logical_block_size(bdev) - 1)) - return -EINVAL; - - /* Invalidate the page cache, including dirty pages. */ - error = truncate_bdev_range(bdev, file->f_mode, start, end); - if (error) - return error; - - switch (mode) { - case FALLOC_FL_ZERO_RANGE: - case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE: - error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, - GFP_KERNEL, BLKDEV_ZERO_NOUNMAP); - break; - case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE: - error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, - GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK); - break; - case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE: - error = blkdev_issue_discard(bdev, start >> 9, len >> 9, - GFP_KERNEL, 0); - break; - default: - return -EOPNOTSUPP; - } - if (error) - return error; - - /* - * Invalidate the page cache again; if someone wandered in and dirtied - * a page, we just discard it - userspace has no way of knowing whether - * the write happened before or after discard completing... - */ - return truncate_bdev_range(bdev, file->f_mode, start, end); -} - -const struct file_operations def_blk_fops = { - .open = blkdev_open, - .release = blkdev_close, - .llseek = block_llseek, - .read_iter = blkdev_read_iter, - .write_iter = blkdev_write_iter, - .iopoll = blkdev_iopoll, - .mmap = generic_file_mmap, - .fsync = blkdev_fsync, - .unlocked_ioctl = block_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = compat_blkdev_ioctl, -#endif - .splice_read = generic_file_splice_read, - .splice_write = iter_file_splice_write, - .fallocate = blkdev_fallocate, -}; - /** * lookup_bdev - lookup a struct block_device by name * @pathname: special file representing the block device diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 3c88a79a319b..38b9f7684952 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1182,10 +1182,6 @@ int blkcg_init_queue(struct request_queue *q) if (preloaded) radix_tree_preload_end(); - ret = blk_iolatency_init(q); - if (ret) - goto err_destroy_all; - ret = blk_ioprio_init(q); if (ret) goto err_destroy_all; @@ -1194,6 +1190,12 @@ int blkcg_init_queue(struct request_queue *q) if (ret) goto err_destroy_all; + ret = blk_iolatency_init(q); + if (ret) { + blk_throtl_exit(q); + goto err_destroy_all; + } + return 0; err_destroy_all: @@ -1364,10 +1366,14 @@ enomem: /* alloc failed, nothing's initialized yet, free everything */ spin_lock_irq(&q->queue_lock); list_for_each_entry(blkg, &q->blkg_list, q_node) { + struct blkcg *blkcg = blkg->blkcg; + + spin_lock(&blkcg->lock); if (blkg->pd[pol->plid]) { pol->pd_free_fn(blkg->pd[pol->plid]); blkg->pd[pol->plid] = NULL; } + spin_unlock(&blkcg->lock); } spin_unlock_irq(&q->queue_lock); ret = -ENOMEM; @@ -1399,12 +1405,16 @@ void blkcg_deactivate_policy(struct request_queue *q, __clear_bit(pol->plid, q->blkcg_pols); list_for_each_entry(blkg, &q->blkg_list, q_node) { + struct blkcg *blkcg = blkg->blkcg; + + spin_lock(&blkcg->lock); if (blkg->pd[pol->plid]) { if (pol->pd_offline_fn) pol->pd_offline_fn(blkg->pd[pol->plid]); pol->pd_free_fn(blkg->pd[pol->plid]); blkg->pd[pol->plid] = NULL; } + spin_unlock(&blkcg->lock); } spin_unlock_irq(&q->queue_lock); diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 69a12177dfb6..16d5d5338392 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -426,8 +426,15 @@ EXPORT_SYMBOL(blk_integrity_register); */ void blk_integrity_unregister(struct gendisk *disk) { + struct blk_integrity *bi = &disk->queue->integrity; + + if (!bi->profile) + return; + + /* ensure all bios are off the integrity workqueue */ + blk_flush_integrity(); blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, disk->queue); - memset(&disk->queue->integrity, 0, sizeof(struct blk_integrity)); + memset(bi, 0, sizeof(*bi)); } EXPORT_SYMBOL(blk_integrity_unregister); diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 86f87346232a..ff5caeb82542 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -208,7 +208,7 @@ static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags, spin_lock_irqsave(&tags->lock, flags); rq = tags->rqs[bitnr]; - if (!rq || !refcount_inc_not_zero(&rq->ref)) + if (!rq || rq->tag != bitnr || !refcount_inc_not_zero(&rq->ref)) rq = NULL; spin_unlock_irqrestore(&tags->lock, flags); return rq; diff --git a/block/blk-mq.c b/block/blk-mq.c index 65d3a63aecc6..108a352051be 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2135,6 +2135,18 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) } } +/* + * Allow 4x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple + * queues. This is important for md arrays to benefit from merging + * requests. + */ +static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug) +{ + if (plug->multiple_queues) + return BLK_MAX_REQUEST_COUNT * 4; + return BLK_MAX_REQUEST_COUNT; +} + /** * blk_mq_submit_bio - Create and send a request to block device. * @bio: Bio pointer. @@ -2231,7 +2243,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) else last = list_entry_rq(plug->mq_list.prev); - if (request_count >= BLK_MAX_REQUEST_COUNT || (last && + if (request_count >= blk_plug_max_rq_count(plug) || (last && blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) { blk_flush_plug_list(plug, false); trace_block_plug(q); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 55c49015e533..7c4e7993ba97 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2458,6 +2458,7 @@ int blk_throtl_init(struct request_queue *q) void blk_throtl_exit(struct request_queue *q) { BUG_ON(!q->td); + del_timer_sync(&q->td->service_queue.pending_timer); throtl_shutdown_wq(q); blkcg_deactivate_policy(q, &blkcg_policy_throtl); free_percpu(q->td->latency_buckets[READ]); diff --git a/block/blk.h b/block/blk.h index 8c96b0c90c48..7d2a0ba7ed21 100644 --- a/block/blk.h +++ b/block/blk.h @@ -373,4 +373,6 @@ static inline void bio_clear_hipri(struct bio *bio) bio->bi_opf &= ~REQ_HIPRI; } +extern const struct address_space_operations def_blk_aops; + #endif /* BLK_INTERNAL_H */ diff --git a/block/fops.c b/block/fops.c new file mode 100644 index 000000000000..ffce6f6c68dd --- /dev/null +++ b/block/fops.c @@ -0,0 +1,640 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE + * Copyright (C) 2016 - 2020 Christoph Hellwig + */ +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/blkdev.h> +#include <linux/buffer_head.h> +#include <linux/mpage.h> +#include <linux/uio.h> +#include <linux/namei.h> +#include <linux/task_io_accounting_ops.h> +#include <linux/falloc.h> +#include <linux/suspend.h> +#include "blk.h" + +static struct inode *bdev_file_inode(struct file *file) +{ + return file->f_mapping->host; +} + +static int blkdev_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int create) +{ + bh->b_bdev = I_BDEV(inode); + bh->b_blocknr = iblock; + set_buffer_mapped(bh); + return 0; +} + +static unsigned int dio_bio_write_op(struct kiocb *iocb) +{ + unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; + + /* avoid the need for a I/O completion work item */ + if (iocb->ki_flags & IOCB_DSYNC) + op |= REQ_FUA; + return op; +} + +#define DIO_INLINE_BIO_VECS 4 + +static void blkdev_bio_end_io_simple(struct bio *bio) +{ + struct task_struct *waiter = bio->bi_private; + + WRITE_ONCE(bio->bi_private, NULL); + blk_wake_io_task(waiter); +} + +static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, + struct iov_iter *iter, unsigned int nr_pages) +{ + struct file *file = iocb->ki_filp; + struct block_device *bdev = I_BDEV(bdev_file_inode(file)); + struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs; + loff_t pos = iocb->ki_pos; + bool should_dirty = false; + struct bio bio; + ssize_t ret; + blk_qc_t qc; + + if ((pos | iov_iter_alignment(iter)) & + (bdev_logical_block_size(bdev) - 1)) + return -EINVAL; + + if (nr_pages <= DIO_INLINE_BIO_VECS) + vecs = inline_vecs; + else { + vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec), + GFP_KERNEL); + if (!vecs) + return -ENOMEM; + } + + bio_init(&bio, vecs, nr_pages); + bio_set_dev(&bio, bdev); + bio.bi_iter.bi_sector = pos >> 9; + bio.bi_write_hint = iocb->ki_hint; + bio.bi_private = current; + bio.bi_end_io = blkdev_bio_end_io_simple; + bio.bi_ioprio = iocb->ki_ioprio; + + ret = bio_iov_iter_get_pages(&bio, iter); + if (unlikely(ret)) + goto out; + ret = bio.bi_iter.bi_size; + + if (iov_iter_rw(iter) == READ) { + bio.bi_opf = REQ_OP_READ; + if (iter_is_iovec(iter)) + should_dirty = true; + } else { + bio.bi_opf = dio_bio_write_op(iocb); + task_io_account_write(ret); + } + if (iocb->ki_flags & IOCB_NOWAIT) + bio.bi_opf |= REQ_NOWAIT; + if (iocb->ki_flags & IOCB_HIPRI) + bio_set_polled(&bio, iocb); + + qc = submit_bio(&bio); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (!READ_ONCE(bio.bi_private)) + break; + if (!(iocb->ki_flags & IOCB_HIPRI) || + !blk_poll(bdev_get_queue(bdev), qc, true)) + blk_io_schedule(); + } + __set_current_state(TASK_RUNNING); + + bio_release_pages(&bio, should_dirty); + if (unlikely(bio.bi_status)) + ret = blk_status_to_errno(bio.bi_status); + +out: + if (vecs != inline_vecs) + kfree(vecs); + + bio_uninit(&bio); + + return ret; +} + +struct blkdev_dio { + union { + struct kiocb *iocb; + struct task_struct *waiter; + }; + size_t size; + atomic_t ref; + bool multi_bio : 1; + bool should_dirty : 1; + bool is_sync : 1; + struct bio bio; +}; + +static struct bio_set blkdev_dio_pool; + +static int blkdev_iopoll(struct kiocb *kiocb, bool wait) +{ + struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host); + struct request_queue *q = bdev_get_queue(bdev); + + return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait); +} + +static void blkdev_bio_end_io(struct bio *bio) +{ + struct blkdev_dio *dio = bio->bi_private; + bool should_dirty = dio->should_dirty; + + if (bio->bi_status && !dio->bio.bi_status) + dio->bio.bi_status = bio->bi_status; + + if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) { + if (!dio->is_sync) { + struct kiocb *iocb = dio->iocb; + ssize_t ret; + + if (likely(!dio->bio.bi_status)) { + ret = dio->size; + iocb->ki_pos += ret; + } else { + ret = blk_status_to_errno(dio->bio.bi_status); + } + + dio->iocb->ki_complete(iocb, ret, 0); + if (dio->multi_bio) + bio_put(&dio->bio); + } else { + struct task_struct *waiter = dio->waiter; + + WRITE_ONCE(dio->waiter, NULL); + blk_wake_io_task(waiter); + } + } + + if (should_dirty) { + bio_check_pages_dirty(bio); + } else { + bio_release_pages(bio, false); + bio_put(bio); + } +} + +static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + unsigned int nr_pages) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = bdev_file_inode(file); + struct block_device *bdev = I_BDEV(inode); + struct blk_plug plug; + struct blkdev_dio *dio; + struct bio *bio; + bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0; + bool is_read = (iov_iter_rw(iter) == READ), is_sync; + loff_t pos = iocb->ki_pos; + blk_qc_t qc = BLK_QC_T_NONE; + int ret = 0; + + if ((pos | iov_iter_alignment(iter)) & + (bdev_logical_block_size(bdev) - 1)) + return -EINVAL; + + bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool); + + dio = container_of(bio, struct blkdev_dio, bio); + dio->is_sync = is_sync = is_sync_kiocb(iocb); + if (dio->is_sync) { + dio->waiter = current; + bio_get(bio); + } else { + dio->iocb = iocb; + } + + dio->size = 0; + dio->multi_bio = false; + dio->should_dirty = is_read && iter_is_iovec(iter); + + /* + * Don't plug for HIPRI/polled IO, as those should go straight + * to issue + */ + if (!is_poll) + blk_start_plug(&plug); + + for (;;) { + bio_set_dev(bio, bdev); + bio->bi_iter.bi_sector = pos >> 9; + bio->bi_write_hint = iocb->ki_hint; + bio->bi_private = dio; + bio->bi_end_io = blkdev_bio_end_io; + bio->bi_ioprio = iocb->ki_ioprio; + + ret = bio_iov_iter_get_pages(bio, iter); + if (unlikely(ret)) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + break; + } + + if (is_read) { + bio->bi_opf = REQ_OP_READ; + if (dio->should_dirty) + bio_set_pages_dirty(bio); + } else { + bio->bi_opf = dio_bio_write_op(iocb); + task_io_account_write(bio->bi_iter.bi_size); + } + if (iocb->ki_flags & IOCB_NOWAIT) + bio->bi_opf |= REQ_NOWAIT; + + dio->size += bio->bi_iter.bi_size; + pos += bio->bi_iter.bi_size; + + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS); + if (!nr_pages) { + bool polled = false; + + if (iocb->ki_flags & IOCB_HIPRI) { + bio_set_polled(bio, iocb); + polled = true; + } + + qc = submit_bio(bio); + + if (polled) + WRITE_ONCE(iocb->ki_cookie, qc); + break; + } + + if (!dio->multi_bio) { + /* + * AIO needs an extra reference to ensure the dio + * structure which is embedded into the first bio + * stays around. + */ + if (!is_sync) + bio_get(bio); + dio->multi_bio = true; + atomic_set(&dio->ref, 2); + } else { + atomic_inc(&dio->ref); + } + + submit_bio(bio); + bio = bio_alloc(GFP_KERNEL, nr_pages); + } + + if (!is_poll) + blk_finish_plug(&plug); + + if (!is_sync) + return -EIOCBQUEUED; + + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (!READ_ONCE(dio->waiter)) + break; + + if (!(iocb->ki_flags & IOCB_HIPRI) || + !blk_poll(bdev_get_queue(bdev), qc, true)) + blk_io_schedule(); + } + __set_current_state(TASK_RUNNING); + + if (!ret) + ret = blk_status_to_errno(dio->bio.bi_status); + if (likely(!ret)) + ret = dio->size; + + bio_put(&dio->bio); + return ret; +} + +static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) +{ + unsigned int nr_pages; + + if (!iov_iter_count(iter)) + return 0; + + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); + if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS) + return __blkdev_direct_IO_simple(iocb, iter, nr_pages); + + return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages)); +} + +static int blkdev_writepage(struct page *page, struct writeback_control *wbc) +{ + return block_write_full_page(page, blkdev_get_block, wbc); +} + +static int blkdev_readpage(struct file * file, struct page * page) +{ + return block_read_full_page(page, blkdev_get_block); +} + +static void blkdev_readahead(struct readahead_control *rac) +{ + mpage_readahead(rac, blkdev_get_block); +} + +static int blkdev_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, struct page **pagep, + void **fsdata) +{ + return block_write_begin(mapping, pos, len, flags, pagep, + blkdev_get_block); +} + +static int blkdev_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, struct page *page, + void *fsdata) +{ + int ret; + ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); + + unlock_page(page); + put_page(page); + + return ret; +} + +static int blkdev_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + return generic_writepages(mapping, wbc); +} + +const struct address_space_operations def_blk_aops = { + .set_page_dirty = __set_page_dirty_buffers, + .readpage = blkdev_readpage, + .readahead = blkdev_readahead, + .writepage = blkdev_writepage, + .write_begin = blkdev_write_begin, + .write_end = blkdev_write_end, + .writepages = blkdev_writepages, + .direct_IO = blkdev_direct_IO, + .migratepage = buffer_migrate_page_norefs, + .is_dirty_writeback = buffer_check_dirty_writeback, +}; + +/* + * for a block special file file_inode(file)->i_size is zero + * so we compute the size by hand (just as in block_read/write above) + */ +static loff_t blkdev_llseek(struct file *file, loff_t offset, int whence) +{ + struct inode *bd_inode = bdev_file_inode(file); + loff_t retval; + + inode_lock(bd_inode); + retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode)); + inode_unlock(bd_inode); + return retval; +} + +static int blkdev_fsync(struct file *filp, loff_t start, loff_t end, + int datasync) +{ + struct inode *bd_inode = bdev_file_inode(filp); + struct block_device *bdev = I_BDEV(bd_inode); + int error; + + error = file_write_and_wait_range(filp, start, end); + if (error) + return error; + + /* + * There is no need to serialise calls to blkdev_issue_flush with + * i_mutex and doing so causes performance issues with concurrent + * O_SYNC writers to a block device. + */ + error = blkdev_issue_flush(bdev); + if (error == -EOPNOTSUPP) + error = 0; + + return error; +} + +static int blkdev_open(struct inode *inode, struct file *filp) +{ + struct block_device *bdev; + + /* + * Preserve backwards compatibility and allow large file access + * even if userspace doesn't ask for it explicitly. Some mkfs + * binary needs it. We might want to drop this workaround + * during an unstable branch. + */ + filp->f_flags |= O_LARGEFILE; + filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; + + if (filp->f_flags & O_NDELAY) + filp->f_mode |= FMODE_NDELAY; + if (filp->f_flags & O_EXCL) + filp->f_mode |= FMODE_EXCL; + if ((filp->f_flags & O_ACCMODE) == 3) + filp->f_mode |= FMODE_WRITE_IOCTL; + + bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + filp->f_mapping = bdev->bd_inode->i_mapping; + filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); + return 0; +} + +static int blkdev_close(struct inode *inode, struct file *filp) +{ + struct block_device *bdev = I_BDEV(bdev_file_inode(filp)); + + blkdev_put(bdev, filp->f_mode); + return 0; +} + +static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) +{ + struct block_device *bdev = I_BDEV(bdev_file_inode(file)); + fmode_t mode = file->f_mode; + + /* + * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have + * to updated it before every ioctl. + */ + if (file->f_flags & O_NDELAY) + mode |= FMODE_NDELAY; + else + mode &= ~FMODE_NDELAY; + + return blkdev_ioctl(bdev, mode, cmd, arg); +} + +/* + * Write data to the block device. Only intended for the block device itself + * and the raw driver which basically is a fake block device. + * + * Does not take i_mutex for the write and thus is not for general purpose + * use. + */ +static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct inode *bd_inode = bdev_file_inode(file); + loff_t size = i_size_read(bd_inode); + struct blk_plug plug; + size_t shorted = 0; + ssize_t ret; + + if (bdev_read_only(I_BDEV(bd_inode))) + return -EPERM; + + if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev)) + return -ETXTBSY; + + if (!iov_iter_count(from)) + return 0; + + if (iocb->ki_pos >= size) + return -ENOSPC; + + if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT) + return -EOPNOTSUPP; + + size -= iocb->ki_pos; + if (iov_iter_count(from) > size) { + shorted = iov_iter_count(from) - size; + iov_iter_truncate(from, size); + } + + blk_start_plug(&plug); + ret = __generic_file_write_iter(iocb, from); + if (ret > 0) + ret = generic_write_sync(iocb, ret); + iov_iter_reexpand(from, iov_iter_count(from) + shorted); + blk_finish_plug(&plug); + return ret; +} + +static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct inode *bd_inode = bdev_file_inode(file); + loff_t size = i_size_read(bd_inode); + loff_t pos = iocb->ki_pos; + size_t shorted = 0; + ssize_t ret; + + if (pos >= size) + return 0; + + size -= pos; + if (iov_iter_count(to) > size) { + shorted = iov_iter_count(to) - size; + iov_iter_truncate(to, size); + } + + ret = generic_file_read_iter(iocb, to); + iov_iter_reexpand(to, iov_iter_count(to) + shorted); + return ret; +} + +#define BLKDEV_FALLOC_FL_SUPPORTED \ + (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ + FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE) + +static long blkdev_fallocate(struct file *file, int mode, loff_t start, + loff_t len) +{ + struct block_device *bdev = I_BDEV(bdev_file_inode(file)); + loff_t end = start + len - 1; + loff_t isize; + int error; + + /* Fail if we don't recognize the flags. */ + if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED) + return -EOPNOTSUPP; + + /* Don't go off the end of the device. */ + isize = i_size_read(bdev->bd_inode); + if (start >= isize) + return -EINVAL; + if (end >= isize) { + if (mode & FALLOC_FL_KEEP_SIZE) { + len = isize - start; + end = start + len - 1; + } else + return -EINVAL; + } + + /* + * Don't allow IO that isn't aligned to logical block size. + */ + if ((start | len) & (bdev_logical_block_size(bdev) - 1)) + return -EINVAL; + + /* Invalidate the page cache, including dirty pages. */ + error = truncate_bdev_range(bdev, file->f_mode, start, end); + if (error) + return error; + + switch (mode) { + case FALLOC_FL_ZERO_RANGE: + case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE: + error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, + GFP_KERNEL, BLKDEV_ZERO_NOUNMAP); + break; + case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE: + error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, + GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK); + break; + case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE: + error = blkdev_issue_discard(bdev, start >> 9, len >> 9, + GFP_KERNEL, 0); + break; + default: + return -EOPNOTSUPP; + } + if (error) + return error; + + /* + * Invalidate the page cache again; if someone wandered in and dirtied + * a page, we just discard it - userspace has no way of knowing whether + * the write happened before or after discard completing... + */ + return truncate_bdev_range(bdev, file->f_mode, start, end); +} + +const struct file_operations def_blk_fops = { + .open = blkdev_open, + .release = blkdev_close, + .llseek = blkdev_llseek, + .read_iter = blkdev_read_iter, + .write_iter = blkdev_write_iter, + .iopoll = blkdev_iopoll, + .mmap = generic_file_mmap, + .fsync = blkdev_fsync, + .unlocked_ioctl = block_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = compat_blkdev_ioctl, +#endif + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, + .fallocate = blkdev_fallocate, +}; + +static __init int blkdev_init(void) +{ + return bioset_init(&blkdev_dio_pool, 4, + offsetof(struct blkdev_dio, bio), + BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE); +} +module_init(blkdev_init); diff --git a/block/genhd.c b/block/genhd.c index 567549a011d1..7b6e5e1cf956 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -183,6 +183,7 @@ static struct blk_major_name { void (*probe)(dev_t devt); } *major_names[BLKDEV_MAJOR_HASH_SIZE]; static DEFINE_MUTEX(major_names_lock); +static DEFINE_SPINLOCK(major_names_spinlock); /* index in the above - for now: assume no multimajor ranges */ static inline int major_to_index(unsigned major) @@ -195,11 +196,11 @@ void blkdev_show(struct seq_file *seqf, off_t offset) { struct blk_major_name *dp; - mutex_lock(&major_names_lock); + spin_lock(&major_names_spinlock); for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next) if (dp->major == offset) seq_printf(seqf, "%3d %s\n", dp->major, dp->name); - mutex_unlock(&major_names_lock); + spin_unlock(&major_names_spinlock); } #endif /* CONFIG_PROC_FS */ @@ -271,6 +272,7 @@ int __register_blkdev(unsigned int major, const char *name, p->next = NULL; index = major_to_index(major); + spin_lock(&major_names_spinlock); for (n = &major_names[index]; *n; n = &(*n)->next) { if ((*n)->major == major) break; @@ -279,6 +281,7 @@ int __register_blkdev(unsigned int major, const char *name, *n = p; else ret = -EBUSY; + spin_unlock(&major_names_spinlock); if (ret < 0) { printk("register_blkdev: cannot get major %u for %s\n", @@ -298,6 +301,7 @@ void unregister_blkdev(unsigned int major, const char *name) int index = major_to_index(major); mutex_lock(&major_names_lock); + spin_lock(&major_names_spinlock); for (n = &major_names[index]; *n; n = &(*n)->next) if ((*n)->major == major) break; @@ -307,6 +311,7 @@ void unregister_blkdev(unsigned int major, const char *name) p = *n; *n = p->next; } + spin_unlock(&major_names_spinlock); mutex_unlock(&major_names_lock); kfree(p); } diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index a4d4eebba1da..bd482108310c 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1008,23 +1008,14 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) return ret_val; } -/** - * cppc_get_desired_perf - Get the value of desired performance register. - * @cpunum: CPU from which to get desired performance. - * @desired_perf: address of a variable to store the returned desired performance - * - * Return: 0 for success, -EIO otherwise. - */ -int cppc_get_desired_perf(int cpunum, u64 *desired_perf) +static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf) { struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); - int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); - struct cpc_register_resource *desired_reg; - struct cppc_pcc_data *pcc_ss_data = NULL; - - desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF]; + struct cpc_register_resource *reg = &cpc_desc->cpc_regs[reg_idx]; - if (CPC_IN_PCC(desired_reg)) { + if (CPC_IN_PCC(reg)) { + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); + struct cppc_pcc_data *pcc_ss_data = NULL; int ret = 0; if (pcc_ss_id < 0) @@ -1035,7 +1026,7 @@ int cppc_get_desired_perf(int cpunum, u64 *desired_perf) down_write(&pcc_ss_data->pcc_lock); if (send_pcc_cmd(pcc_ss_id, CMD_READ) >= 0) - cpc_read(cpunum, desired_reg, desired_perf); + cpc_read(cpunum, reg, perf); else ret = -EIO; @@ -1044,13 +1035,37 @@ int cppc_get_desired_perf(int cpunum, u64 *desired_perf) return ret; } - cpc_read(cpunum, desired_reg, desired_perf); + cpc_read(cpunum, reg, perf); return 0; } + +/** + * cppc_get_desired_perf - Get the desired performance register value. + * @cpunum: CPU from which to get desired performance. + * @desired_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +int cppc_get_desired_perf(int cpunum, u64 *desired_perf) +{ + return cppc_get_perf(cpunum, DESIRED_PERF, desired_perf); +} EXPORT_SYMBOL_GPL(cppc_get_desired_perf); /** + * cppc_get_nominal_perf - Get the nominal performance register value. + * @cpunum: CPU from which to get nominal performance. + * @nominal_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) +{ + return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf); +} + +/** * cppc_get_perf_caps - Get a CPU's performance capabilities. * @cpunum: CPU from which to get capabilities info. * @perf_caps: ptr to cppc_perf_caps. See cppc_acpi.h diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c index 1f6007abcf18..89c22bc55057 100644 --- a/drivers/acpi/prmt.c +++ b/drivers/acpi/prmt.c @@ -288,10 +288,18 @@ invalid_guid: void __init init_prmt(void) { + struct acpi_table_header *tbl; acpi_status status; - int mc = acpi_table_parse_entries(ACPI_SIG_PRMT, sizeof(struct acpi_table_prmt) + + int mc; + + status = acpi_get_table(ACPI_SIG_PRMT, 0, &tbl); + if (ACPI_FAILURE(status)) + return; + + mc = acpi_table_parse_entries(ACPI_SIG_PRMT, sizeof(struct acpi_table_prmt) + sizeof (struct acpi_table_prmt_header), 0, acpi_parse_prmt, 0); + acpi_put_table(tbl); /* * Return immediately if PRMT table is not present or no PRM module found. */ diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index b24513ec3fae..5b54c80b9d32 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -16,7 +16,6 @@ #include <linux/signal.h> #include <linux/kthread.h> #include <linux/dmi.h> -#include <linux/nls.h> #include <linux/dma-map-ops.h> #include <linux/platform_data/x86/apple.h> #include <linux/pgtable.h> diff --git a/drivers/auxdisplay/cfag12864b.c b/drivers/auxdisplay/cfag12864b.c index fd430e6866a1..6526aa51fb1d 100644 --- a/drivers/auxdisplay/cfag12864b.c +++ b/drivers/auxdisplay/cfag12864b.c @@ -33,7 +33,7 @@ */ static unsigned int cfag12864b_rate = CONFIG_CFAG12864B_RATE; -module_param(cfag12864b_rate, uint, S_IRUGO); +module_param(cfag12864b_rate, uint, 0444); MODULE_PARM_DESC(cfag12864b_rate, "Refresh rate (hertz)"); diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c index 24fd6f369ebe..304accde365c 100644 --- a/drivers/auxdisplay/charlcd.c +++ b/drivers/auxdisplay/charlcd.c @@ -637,9 +637,7 @@ static int panel_notify_sys(struct notifier_block *this, unsigned long code, } static struct notifier_block panel_notifier = { - panel_notify_sys, - NULL, - 0 + .notifier_call = panel_notify_sys, }; int charlcd_register(struct charlcd *lcd) diff --git a/drivers/auxdisplay/hd44780.c b/drivers/auxdisplay/hd44780.c index 2e5e7c993933..8b2a0eb3f32a 100644 --- a/drivers/auxdisplay/hd44780.c +++ b/drivers/auxdisplay/hd44780.c @@ -323,8 +323,8 @@ static int hd44780_remove(struct platform_device *pdev) { struct charlcd *lcd = platform_get_drvdata(pdev); - kfree(lcd->drvdata); charlcd_unregister(lcd); + kfree(lcd->drvdata); kfree(lcd); return 0; diff --git a/drivers/auxdisplay/ks0108.c b/drivers/auxdisplay/ks0108.c index 03c95ad4216c..e871b94a1911 100644 --- a/drivers/auxdisplay/ks0108.c +++ b/drivers/auxdisplay/ks0108.c @@ -28,11 +28,11 @@ */ static unsigned int ks0108_port = CONFIG_KS0108_PORT; -module_param(ks0108_port, uint, S_IRUGO); +module_param(ks0108_port, uint, 0444); MODULE_PARM_DESC(ks0108_port, "Parallel port where the LCD is connected"); static unsigned int ks0108_delay = CONFIG_KS0108_DELAY; -module_param(ks0108_delay, uint, S_IRUGO); +module_param(ks0108_delay, uint, 0444); MODULE_PARM_DESC(ks0108_delay, "Delay between each control writing (microseconds)"); /* @@ -167,19 +167,7 @@ static struct parport_driver ks0108_parport_driver = { .detach = ks0108_parport_detach, .devmodel = true, }; - -static int __init ks0108_init(void) -{ - return parport_register_driver(&ks0108_parport_driver); -} - -static void __exit ks0108_exit(void) -{ - parport_unregister_driver(&ks0108_parport_driver); -} - -module_init(ks0108_init); -module_exit(ks0108_exit); +module_parport_driver(ks0108_parport_driver); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Miguel Ojeda <ojeda@kernel.org>"); diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index 46c503486e96..00fb4120a5b3 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -264,7 +264,7 @@ void __init numa_free_distance(void) size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]); - memblock_free(__pa(numa_distance), size); + memblock_free_ptr(numa_distance, size); numa_distance_cnt = 0; numa_distance = NULL; } diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index d568772152c2..cbea78e79f3d 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1642,7 +1642,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) } dev->power.may_skip_resume = true; - dev->power.must_resume = false; + dev->power.must_resume = !dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME); dpm_watchdog_set(&wd, dev); device_lock(dev); diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index a97f33d0c59f..94665037f4a3 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -13,6 +13,7 @@ #include <linux/export.h> #include <linux/rtc.h> #include <linux/suspend.h> +#include <linux/init.h> #include <linux/mc146818rtc.h> @@ -165,6 +166,9 @@ void generate_pm_trace(const void *tracedata, unsigned int user) const char *file = *(const char **)(tracedata + 2); unsigned int user_hash_value, file_hash_value; + if (!x86_platform.legacy.rtc) + return; + user_hash_value = user % USERHASH; file_hash_value = hash_string(lineno, file, FILEHASH); set_magic_time(user_hash_value, file_hash_value, dev_hash_value); @@ -267,6 +271,9 @@ static struct notifier_block pm_trace_nb = { static int __init early_resume_init(void) { + if (!x86_platform.legacy.rtc) + return 0; + hash_value_early_read = read_magic_time(); register_pm_notifier(&pm_trace_nb); return 0; @@ -277,6 +284,9 @@ static int __init late_resume_init(void) unsigned int val = hash_value_early_read; unsigned int user, file, dev; + if (!x86_platform.legacy.rtc) + return 0; + user = val % USERHASH; val = val / USERHASH; file = val % FILEHASH; diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index 3bad3266a2ad..b91a3a9bf9f6 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -12,14 +12,11 @@ /** * dev_pm_attach_wake_irq - Attach device interrupt as a wake IRQ * @dev: Device entry - * @irq: Device wake-up capable interrupt * @wirq: Wake irq specific data * - * Internal function to attach either a device IO interrupt or a - * dedicated wake-up interrupt as a wake IRQ. + * Internal function to attach a dedicated wake-up interrupt as a wake IRQ. */ -static int dev_pm_attach_wake_irq(struct device *dev, int irq, - struct wake_irq *wirq) +static int dev_pm_attach_wake_irq(struct device *dev, struct wake_irq *wirq) { unsigned long flags; @@ -65,7 +62,7 @@ int dev_pm_set_wake_irq(struct device *dev, int irq) wirq->dev = dev; wirq->irq = irq; - err = dev_pm_attach_wake_irq(dev, irq, wirq); + err = dev_pm_attach_wake_irq(dev, wirq); if (err) kfree(wirq); @@ -196,7 +193,7 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq) if (err) goto err_free_name; - err = dev_pm_attach_wake_irq(dev, irq, wirq); + err = dev_pm_attach_wake_irq(dev, wirq); if (err) goto err_free_irq; diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index c84be0028f63..26798da661bd 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -129,8 +129,8 @@ static int __init n64cart_probe(struct platform_device *pdev) } reg_base = devm_platform_ioremap_resource(pdev, 0); - if (!reg_base) - return -EINVAL; + if (IS_ERR(reg_base)) + return PTR_ERR(reg_base); disk = blk_alloc_disk(NUMA_NO_NODE); if (!disk) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 57c6ae7debd9..9b3bd083b411 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -762,7 +762,7 @@ static int virtblk_probe(struct virtio_device *vdev) goto out_free_vblk; /* Default queue sizing is to fill the ring. */ - if (likely(!virtblk_queue_depth)) { + if (!virtblk_queue_depth) { queue_depth = vblk->vqs[0].vq->num_free; /* ... but without indirect descs, we use 2 descs per req */ if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) @@ -836,7 +836,7 @@ static int virtblk_probe(struct virtio_device *vdev) else blk_size = queue_logical_block_size(q); - if (unlikely(blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE)) { + if (blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE) { dev_err(&vdev->dev, "block size is changed unexpectedly, now is %u\n", blk_size); diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index bb466981dc1b..6f3272b58ced 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -591,7 +591,7 @@ static void handle_transaction_done(struct smi_info *smi_info) smi_info->handlers->get_result(smi_info->si_sm, msg, 3); if (msg[2] != 0) { /* Error clearing flags */ - dev_warn(smi_info->io.dev, + dev_warn_ratelimited(smi_info->io.dev, "Error clearing flags: %2.2x\n", msg[2]); } smi_info->si_state = SI_NORMAL; @@ -683,10 +683,10 @@ static void handle_transaction_done(struct smi_info *smi_info) /* We got the flags from the SMI, now handle them. */ smi_info->handlers->get_result(smi_info->si_sm, msg, 4); if (msg[2] != 0) { - dev_warn(smi_info->io.dev, - "Couldn't get irq info: %x.\n", msg[2]); - dev_warn(smi_info->io.dev, - "Maybe ok, but ipmi might run very slowly.\n"); + dev_warn_ratelimited(smi_info->io.dev, + "Couldn't get irq info: %x,\n" + "Maybe ok, but ipmi might run very slowly.\n", + msg[2]); smi_info->si_state = SI_NORMAL; break; } @@ -721,7 +721,7 @@ static void handle_transaction_done(struct smi_info *smi_info) smi_info->handlers->get_result(smi_info->si_sm, msg, 4); if (msg[2] != 0) - dev_warn(smi_info->io.dev, + dev_warn_ratelimited(smi_info->io.dev, "Could not set the global enables: 0x%x.\n", msg[2]); @@ -1343,7 +1343,7 @@ retry: if (cc != IPMI_CC_NO_ERROR && ++retry_count <= GET_DEVICE_ID_MAX_RETRY) { - dev_warn(smi_info->io.dev, + dev_warn_ratelimited(smi_info->io.dev, "BMC returned 0x%2.2x, retry get bmc device id\n", cc); goto retry; @@ -1605,7 +1605,7 @@ static ssize_t name##_show(struct device *dev, \ \ return snprintf(buf, 10, "%u\n", smi_get_stat(smi_info, name)); \ } \ -static DEVICE_ATTR(name, 0444, name##_show, NULL) +static DEVICE_ATTR_RO(name) static ssize_t type_show(struct device *dev, struct device_attribute *attr, @@ -1615,7 +1615,7 @@ static ssize_t type_show(struct device *dev, return snprintf(buf, 10, "%s\n", si_to_str[smi_info->io.si_type]); } -static DEVICE_ATTR(type, 0444, type_show, NULL); +static DEVICE_ATTR_RO(type); static ssize_t interrupts_enabled_show(struct device *dev, struct device_attribute *attr, @@ -1626,8 +1626,7 @@ static ssize_t interrupts_enabled_show(struct device *dev, return snprintf(buf, 10, "%d\n", enabled); } -static DEVICE_ATTR(interrupts_enabled, 0444, - interrupts_enabled_show, NULL); +static DEVICE_ATTR_RO(interrupts_enabled); IPMI_SI_ATTR(short_timeouts); IPMI_SI_ATTR(long_timeouts); @@ -1658,7 +1657,7 @@ static ssize_t params_show(struct device *dev, smi_info->io.irq, smi_info->io.slave_addr); } -static DEVICE_ATTR(params, 0444, params_show, NULL); +static DEVICE_ATTR_RO(params); static struct attribute *ipmi_si_dev_attrs[] = { &dev_attr_type.attr, diff --git a/drivers/clk/qcom/gcc-sm6350.c b/drivers/clk/qcom/gcc-sm6350.c index 053089f83677..3236706771b1 100644 --- a/drivers/clk/qcom/gcc-sm6350.c +++ b/drivers/clk/qcom/gcc-sm6350.c @@ -176,10 +176,6 @@ static const struct parent_map gcc_parent_map_2[] = { { P_GPLL0_OUT_ODD, 2 }, }; -static const struct clk_parent_data gcc_parent_data_2[] = { - { .fw_name = "bi_tcxo" }, - { .hw = &gpll0_out_odd.clkr.hw }, -}; static const struct clk_parent_data gcc_parent_data_2_ao[] = { { .fw_name = "bi_tcxo_ao" }, { .hw = &gpll0_out_odd.clkr.hw }, diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c index 66b05a326910..a6f365b9cc1a 100644 --- a/drivers/cpufreq/cpufreq_governor_attr_set.c +++ b/drivers/cpufreq/cpufreq_governor_attr_set.c @@ -74,8 +74,8 @@ unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *l if (count) return count; - kobject_put(&attr_set->kobj); mutex_destroy(&attr_set->update_lock); + kobject_put(&attr_set->kobj); return 0; } EXPORT_SYMBOL_GPL(gov_attr_set_put); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 2d83a9f9651b..8c176b7dae41 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -268,6 +268,7 @@ static struct cpudata **all_cpu_data; * @get_min: Callback to get minimum P state * @get_turbo: Callback to get turbo P state * @get_scaling: Callback to get frequency scaling factor + * @get_cpu_scaling: Get frequency scaling factor for a given cpu * @get_aperf_mperf_shift: Callback to get the APERF vs MPERF frequency difference * @get_val: Callback to convert P state to actual MSR write value * @get_vid: Callback to get VID data for Atom platforms @@ -281,6 +282,7 @@ struct pstate_funcs { int (*get_min)(void); int (*get_turbo)(void); int (*get_scaling)(void); + int (*get_cpu_scaling)(int cpu); int (*get_aperf_mperf_shift)(void); u64 (*get_val)(struct cpudata*, int pstate); void (*get_vid)(struct cpudata *); @@ -384,6 +386,15 @@ static int intel_pstate_get_cppc_guaranteed(int cpu) return cppc_perf.nominal_perf; } +static u32 intel_pstate_cppc_nominal(int cpu) +{ + u64 nominal_perf; + + if (cppc_get_nominal_perf(cpu, &nominal_perf)) + return 0; + + return nominal_perf; +} #else /* CONFIG_ACPI_CPPC_LIB */ static inline void intel_pstate_set_itmt_prio(int cpu) { @@ -470,20 +481,6 @@ static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) acpi_processor_unregister_performance(policy->cpu); } - -static bool intel_pstate_cppc_perf_valid(u32 perf, struct cppc_perf_caps *caps) -{ - return perf && perf <= caps->highest_perf && perf >= caps->lowest_perf; -} - -static bool intel_pstate_cppc_perf_caps(struct cpudata *cpu, - struct cppc_perf_caps *caps) -{ - if (cppc_get_perf_caps(cpu->cpu, caps)) - return false; - - return caps->highest_perf && caps->lowest_perf <= caps->highest_perf; -} #else /* CONFIG_ACPI */ static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) { @@ -506,15 +503,8 @@ static inline int intel_pstate_get_cppc_guaranteed(int cpu) } #endif /* CONFIG_ACPI_CPPC_LIB */ -static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu) -{ - pr_debug("CPU%d: Using PERF_CTL scaling for HWP\n", cpu->cpu); - - cpu->pstate.scaling = cpu->pstate.perf_ctl_scaling; -} - /** - * intel_pstate_hybrid_hwp_calibrate - Calibrate HWP performance levels. + * intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels. * @cpu: Target CPU. * * On hybrid processors, HWP may expose more performance levels than there are @@ -522,115 +512,46 @@ static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu) * scaling factor between HWP performance levels and CPU frequency will be less * than the scaling factor between P-state values and CPU frequency. * - * In that case, the scaling factor between HWP performance levels and CPU - * frequency needs to be determined which can be done with the help of the - * observation that certain HWP performance levels should correspond to certain - * P-states, like for example the HWP highest performance should correspond - * to the maximum turbo P-state of the CPU. + * In that case, adjust the CPU parameters used in computations accordingly. */ -static void intel_pstate_hybrid_hwp_calibrate(struct cpudata *cpu) +static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu) { int perf_ctl_max_phys = cpu->pstate.max_pstate_physical; int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling; int perf_ctl_turbo = pstate_funcs.get_turbo(); int turbo_freq = perf_ctl_turbo * perf_ctl_scaling; - int perf_ctl_max = pstate_funcs.get_max(); - int max_freq = perf_ctl_max * perf_ctl_scaling; - int scaling = INT_MAX; - int freq; + int scaling = cpu->pstate.scaling; pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys); - pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, perf_ctl_max); + pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, pstate_funcs.get_max()); pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo); pr_debug("CPU%d: perf_ctl_scaling = %d\n", cpu->cpu, perf_ctl_scaling); - pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate); pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate); - -#ifdef CONFIG_ACPI - if (IS_ENABLED(CONFIG_ACPI_CPPC_LIB)) { - struct cppc_perf_caps caps; - - if (intel_pstate_cppc_perf_caps(cpu, &caps)) { - if (intel_pstate_cppc_perf_valid(caps.nominal_perf, &caps)) { - pr_debug("CPU%d: Using CPPC nominal\n", cpu->cpu); - - /* - * If the CPPC nominal performance is valid, it - * can be assumed to correspond to cpu_khz. - */ - if (caps.nominal_perf == perf_ctl_max_phys) { - intel_pstate_hybrid_hwp_perf_ctl_parity(cpu); - return; - } - scaling = DIV_ROUND_UP(cpu_khz, caps.nominal_perf); - } else if (intel_pstate_cppc_perf_valid(caps.guaranteed_perf, &caps)) { - pr_debug("CPU%d: Using CPPC guaranteed\n", cpu->cpu); - - /* - * If the CPPC guaranteed performance is valid, - * it can be assumed to correspond to max_freq. - */ - if (caps.guaranteed_perf == perf_ctl_max) { - intel_pstate_hybrid_hwp_perf_ctl_parity(cpu); - return; - } - scaling = DIV_ROUND_UP(max_freq, caps.guaranteed_perf); - } - } - } -#endif - /* - * If using the CPPC data to compute the HWP-to-frequency scaling factor - * doesn't work, use the HWP_CAP gauranteed perf for this purpose with - * the assumption that it corresponds to max_freq. - */ - if (scaling > perf_ctl_scaling) { - pr_debug("CPU%d: Using HWP_CAP guaranteed\n", cpu->cpu); - - if (cpu->pstate.max_pstate == perf_ctl_max) { - intel_pstate_hybrid_hwp_perf_ctl_parity(cpu); - return; - } - scaling = DIV_ROUND_UP(max_freq, cpu->pstate.max_pstate); - if (scaling > perf_ctl_scaling) { - /* - * This should not happen, because it would mean that - * the number of HWP perf levels was less than the - * number of P-states, so use the PERF_CTL scaling in - * that case. - */ - pr_debug("CPU%d: scaling (%d) out of range\n", cpu->cpu, - scaling); - - intel_pstate_hybrid_hwp_perf_ctl_parity(cpu); - return; - } - } + pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling); /* - * If the product of the HWP performance scaling factor obtained above - * and the HWP_CAP highest performance is greater than the maximum turbo - * frequency corresponding to the pstate_funcs.get_turbo() return value, - * the scaling factor is too high, so recompute it so that the HWP_CAP - * highest performance corresponds to the maximum turbo frequency. + * If the product of the HWP performance scaling factor and the HWP_CAP + * highest performance is greater than the maximum turbo frequency + * corresponding to the pstate_funcs.get_turbo() return value, the + * scaling factor is too high, so recompute it to make the HWP_CAP + * highest performance correspond to the maximum turbo frequency. */ if (turbo_freq < cpu->pstate.turbo_pstate * scaling) { - pr_debug("CPU%d: scaling too high (%d)\n", cpu->cpu, scaling); - cpu->pstate.turbo_freq = turbo_freq; scaling = DIV_ROUND_UP(turbo_freq, cpu->pstate.turbo_pstate); - } - - cpu->pstate.scaling = scaling; + cpu->pstate.scaling = scaling; - pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling); + pr_debug("CPU%d: refined HWP-to-frequency scaling factor: %d\n", + cpu->cpu, scaling); + } cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling, perf_ctl_scaling); - freq = perf_ctl_max_phys * perf_ctl_scaling; - cpu->pstate.max_pstate_physical = DIV_ROUND_UP(freq, scaling); + cpu->pstate.max_pstate_physical = + DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling, + scaling); cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling; /* @@ -1861,6 +1782,38 @@ static int knl_get_turbo_pstate(void) return ret; } +#ifdef CONFIG_ACPI_CPPC_LIB +static u32 hybrid_ref_perf; + +static int hybrid_get_cpu_scaling(int cpu) +{ + return DIV_ROUND_UP(core_get_scaling() * hybrid_ref_perf, + intel_pstate_cppc_nominal(cpu)); +} + +static void intel_pstate_cppc_set_cpu_scaling(void) +{ + u32 min_nominal_perf = U32_MAX; + int cpu; + + for_each_present_cpu(cpu) { + u32 nominal_perf = intel_pstate_cppc_nominal(cpu); + + if (nominal_perf && nominal_perf < min_nominal_perf) + min_nominal_perf = nominal_perf; + } + + if (min_nominal_perf < U32_MAX) { + hybrid_ref_perf = min_nominal_perf; + pstate_funcs.get_cpu_scaling = hybrid_get_cpu_scaling; + } +} +#else +static inline void intel_pstate_cppc_set_cpu_scaling(void) +{ +} +#endif /* CONFIG_ACPI_CPPC_LIB */ + static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) { trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); @@ -1889,10 +1842,8 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu) static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { - bool hybrid_cpu = boot_cpu_has(X86_FEATURE_HYBRID_CPU); int perf_ctl_max_phys = pstate_funcs.get_max_physical(); - int perf_ctl_scaling = hybrid_cpu ? cpu_khz / perf_ctl_max_phys : - pstate_funcs.get_scaling(); + int perf_ctl_scaling = pstate_funcs.get_scaling(); cpu->pstate.min_pstate = pstate_funcs.get_min(); cpu->pstate.max_pstate_physical = perf_ctl_max_phys; @@ -1901,10 +1852,13 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) if (hwp_active && !hwp_mode_bdw) { __intel_pstate_get_hwp_cap(cpu); - if (hybrid_cpu) - intel_pstate_hybrid_hwp_calibrate(cpu); - else + if (pstate_funcs.get_cpu_scaling) { + cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu); + if (cpu->pstate.scaling != perf_ctl_scaling) + intel_pstate_hybrid_hwp_adjust(cpu); + } else { cpu->pstate.scaling = perf_ctl_scaling; + } } else { cpu->pstate.scaling = perf_ctl_scaling; cpu->pstate.max_pstate = pstate_funcs.get_max(); @@ -3251,11 +3205,15 @@ static int __init intel_pstate_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return -ENODEV; - if (no_load) - return -ENODEV; - id = x86_match_cpu(hwp_support_ids); if (id) { + bool hwp_forced = intel_pstate_hwp_is_enabled(); + + if (hwp_forced) + pr_info("HWP enabled by BIOS\n"); + else if (no_load) + return -ENODEV; + copy_cpu_funcs(&core_funcs); /* * Avoid enabling HWP for processors without EPP support, @@ -3265,8 +3223,7 @@ static int __init intel_pstate_init(void) * If HWP is enabled already, though, there is no choice but to * deal with it. */ - if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) || - intel_pstate_hwp_is_enabled()) { + if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) || hwp_forced) { hwp_active++; hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; @@ -3276,9 +3233,16 @@ static int __init intel_pstate_init(void) if (!default_driver) default_driver = &intel_pstate; + if (boot_cpu_has(X86_FEATURE_HYBRID_CPU)) + intel_pstate_cppc_set_cpu_scaling(); + goto hwp_cpu_matched; } + pr_info("HWP not enabled\n"); } else { + if (no_load) + return -ENODEV; + id = x86_match_cpu(intel_pstate_cpu_ids); if (!id) { pr_info("CPU model not supported\n"); @@ -3357,10 +3321,9 @@ static int __init intel_pstate_setup(char *str) else if (!strcmp(str, "passive")) default_driver = &intel_cpufreq; - if (!strcmp(str, "no_hwp")) { - pr_info("HWP disabled\n"); + if (!strcmp(str, "no_hwp")) no_hwp = 1; - } + if (!strcmp(str, "force")) force_load = 1; if (!strcmp(str, "hwp_only")) diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c index 284b6bd040b1..d295f405c4bb 100644 --- a/drivers/cpufreq/vexpress-spc-cpufreq.c +++ b/drivers/cpufreq/vexpress-spc-cpufreq.c @@ -451,7 +451,6 @@ static int ve_spc_cpufreq_init(struct cpufreq_policy *policy) static int ve_spc_cpufreq_exit(struct cpufreq_policy *policy) { struct device *cpu_dev; - int cur_cluster = cpu_to_cluster(policy->cpu); cpu_dev = get_cpu_device(policy->cpu); if (!cpu_dev) { diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig index 9561e3d2d428..541efe01abc7 100644 --- a/drivers/dma-buf/Kconfig +++ b/drivers/dma-buf/Kconfig @@ -42,6 +42,7 @@ config UDMABUF config DMABUF_MOVE_NOTIFY bool "Move notify between drivers (EXPERIMENTAL)" default n + depends on DMA_SHARED_BUFFER help Don't pin buffers if the dynamic DMA-buf interface is available on both the exporter as well as the importer. This fixes a security @@ -52,6 +53,7 @@ config DMABUF_MOVE_NOTIFY config DMABUF_DEBUG bool "DMA-BUF debug checks" + depends on DMA_SHARED_BUFFER default y if DMA_API_DEBUG help This option enables additional checks for DMA-BUF importers and @@ -74,7 +76,7 @@ menuconfig DMABUF_HEAPS menuconfig DMABUF_SYSFS_STATS bool "DMA-BUF sysfs statistics" - select DMA_SHARED_BUFFER + depends on DMA_SHARED_BUFFER help Choose this option to enable DMA-BUF sysfs statistics in location /sys/kernel/dmabuf/buffers. diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index 715e491dfbc3..4c3fd2eed1da 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -488,9 +488,7 @@ static int fwnet_finish_incoming_packet(struct net_device *net, struct sk_buff *skb, u16 source_node_id, bool is_broadcast, u16 ether_type) { - struct fwnet_device *dev; int status; - __be64 guid; switch (ether_type) { case ETH_P_ARP: @@ -503,7 +501,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net, goto err; } - dev = netdev_priv(net); /* Write metadata, and then pass to the receive level */ skb->dev = net; skb->ip_summed = CHECKSUM_NONE; @@ -512,7 +509,6 @@ static int fwnet_finish_incoming_packet(struct net_device *net, * Parse the encapsulation header. This actually does the job of * converting to an ethernet-like pseudo frame header. */ - guid = cpu_to_be64(dev->card->guid); if (dev_hard_header(skb, net, ether_type, is_broadcast ? net->broadcast : net->dev_addr, NULL, skb->len) >= 0) { diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c index ced1964faf42..2ee97bab7440 100644 --- a/drivers/firmware/qcom_scm.c +++ b/drivers/firmware/qcom_scm.c @@ -1147,6 +1147,64 @@ int qcom_scm_qsmmu500_wait_safe_toggle(bool en) } EXPORT_SYMBOL(qcom_scm_qsmmu500_wait_safe_toggle); +bool qcom_scm_lmh_dcvsh_available(void) +{ + return __qcom_scm_is_call_available(__scm->dev, QCOM_SCM_SVC_LMH, QCOM_SCM_LMH_LIMIT_DCVSH); +} +EXPORT_SYMBOL(qcom_scm_lmh_dcvsh_available); + +int qcom_scm_lmh_profile_change(u32 profile_id) +{ + struct qcom_scm_desc desc = { + .svc = QCOM_SCM_SVC_LMH, + .cmd = QCOM_SCM_LMH_LIMIT_PROFILE_CHANGE, + .arginfo = QCOM_SCM_ARGS(1, QCOM_SCM_VAL), + .args[0] = profile_id, + .owner = ARM_SMCCC_OWNER_SIP, + }; + + return qcom_scm_call(__scm->dev, &desc, NULL); +} +EXPORT_SYMBOL(qcom_scm_lmh_profile_change); + +int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, + u64 limit_node, u32 node_id, u64 version) +{ + dma_addr_t payload_phys; + u32 *payload_buf; + int ret, payload_size = 5 * sizeof(u32); + + struct qcom_scm_desc desc = { + .svc = QCOM_SCM_SVC_LMH, + .cmd = QCOM_SCM_LMH_LIMIT_DCVSH, + .arginfo = QCOM_SCM_ARGS(5, QCOM_SCM_RO, QCOM_SCM_VAL, QCOM_SCM_VAL, + QCOM_SCM_VAL, QCOM_SCM_VAL), + .args[1] = payload_size, + .args[2] = limit_node, + .args[3] = node_id, + .args[4] = version, + .owner = ARM_SMCCC_OWNER_SIP, + }; + + payload_buf = dma_alloc_coherent(__scm->dev, payload_size, &payload_phys, GFP_KERNEL); + if (!payload_buf) + return -ENOMEM; + + payload_buf[0] = payload_fn; + payload_buf[1] = 0; + payload_buf[2] = payload_reg; + payload_buf[3] = 1; + payload_buf[4] = payload_val; + + desc.args[0] = payload_phys; + + ret = qcom_scm_call(__scm->dev, &desc, NULL); + + dma_free_coherent(__scm->dev, payload_size, payload_buf, payload_phys); + return ret; +} +EXPORT_SYMBOL(qcom_scm_lmh_dcvsh); + static int qcom_scm_find_dload_address(struct device *dev, u64 *addr) { struct device_node *tcsr; diff --git a/drivers/firmware/qcom_scm.h b/drivers/firmware/qcom_scm.h index 632fe3142462..d92156ceb3ac 100644 --- a/drivers/firmware/qcom_scm.h +++ b/drivers/firmware/qcom_scm.h @@ -114,6 +114,10 @@ extern int scm_legacy_call(struct device *dev, const struct qcom_scm_desc *desc, #define QCOM_SCM_SVC_HDCP 0x11 #define QCOM_SCM_HDCP_INVOKE 0x01 +#define QCOM_SCM_SVC_LMH 0x13 +#define QCOM_SCM_LMH_LIMIT_PROFILE_CHANGE 0x01 +#define QCOM_SCM_LMH_LIMIT_DCVSH 0x10 + #define QCOM_SCM_SVC_SMMU_PROGRAM 0x15 #define QCOM_SCM_SMMU_CONFIG_ERRATA1 0x03 #define QCOM_SCM_SMMU_CONFIG_ERRATA1_CLIENT_ALL 0x02 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index dc3c6b3a00e5..d356e329e6f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -758,7 +758,7 @@ enum amd_hw_ip_block_type { MAX_HWIP }; -#define HWIP_MAX_INSTANCE 8 +#define HWIP_MAX_INSTANCE 10 struct amd_powerplay { void *pp_handle; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 3003ee1c9487..1d41c2c00623 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -192,6 +192,16 @@ void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm) kgd2kfd_suspend(adev->kfd.dev, run_pm); } +int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev) +{ + int r = 0; + + if (adev->kfd.dev) + r = kgd2kfd_resume_iommu(adev->kfd.dev); + + return r; +} + int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm) { int r = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index ec028cf963f5..3bc52b2c604f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -137,6 +137,7 @@ int amdgpu_amdkfd_init(void); void amdgpu_amdkfd_fini(void); void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm); +int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev); int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm); void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry); @@ -327,6 +328,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources); void kgd2kfd_device_exit(struct kfd_dev *kfd); void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm); +int kgd2kfd_resume_iommu(struct kfd_dev *kfd); int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm); int kgd2kfd_pre_reset(struct kfd_dev *kfd); int kgd2kfd_post_reset(struct kfd_dev *kfd); @@ -365,6 +367,11 @@ static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) { } +static int __maybe_unused kgd2kfd_resume_iommu(struct kfd_dev *kfd) +{ + return 0; +} + static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 8f53837d4d3e..97178b307ed6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -468,14 +468,18 @@ bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *ade return (fw_cap & ATOM_FIRMWARE_CAP_DYNAMIC_BOOT_CFG_ENABLE) ? true : false; } -/* - * Helper function to query RAS EEPROM address - * - * @adev: amdgpu_device pointer +/** + * amdgpu_atomfirmware_ras_rom_addr -- Get the RAS EEPROM addr from VBIOS + * adev: amdgpu_device pointer + * i2c_address: pointer to u8; if not NULL, will contain + * the RAS EEPROM address if the function returns true * - * Return true if vbios supports ras rom address reporting + * Return true if VBIOS supports RAS EEPROM address reporting, + * else return false. If true and @i2c_address is not NULL, + * will contain the RAS ROM address. */ -bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_address) +bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, + u8 *i2c_address) { struct amdgpu_mode_info *mode_info = &adev->mode_info; int index; @@ -483,27 +487,39 @@ bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_a union firmware_info *firmware_info; u8 frev, crev; - if (i2c_address == NULL) - return false; - - *i2c_address = 0; - index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - firmwareinfo); + firmwareinfo); if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, - index, &size, &frev, &crev, &data_offset)) { + index, &size, &frev, &crev, + &data_offset)) { /* support firmware_info 3.4 + */ if ((frev == 3 && crev >=4) || (frev > 3)) { firmware_info = (union firmware_info *) (mode_info->atom_context->bios + data_offset); - *i2c_address = firmware_info->v34.ras_rom_i2c_slave_addr; + /* The ras_rom_i2c_slave_addr should ideally + * be a 19-bit EEPROM address, which would be + * used as is by the driver; see top of + * amdgpu_eeprom.c. + * + * When this is the case, 0 is of course a + * valid RAS EEPROM address, in which case, + * we'll drop the first "if (firm...)" and only + * leave the check for the pointer. + * + * The reason this works right now is because + * ras_rom_i2c_slave_addr contains the EEPROM + * device type qualifier 1010b in the top 4 + * bits. + */ + if (firmware_info->v34.ras_rom_i2c_slave_addr) { + if (i2c_address) + *i2c_address = firmware_info->v34.ras_rom_i2c_slave_addr; + return true; + } } } - if (*i2c_address != 0) - return true; - return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 277128846dd1..463b9c0283f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1544,20 +1544,18 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) struct dentry *ent; int r, i; - - ent = debugfs_create_file("amdgpu_preempt_ib", 0600, root, adev, &fops_ib_preempt); - if (!ent) { + if (IS_ERR(ent)) { DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n"); - return -EIO; + return PTR_ERR(ent); } ent = debugfs_create_file("amdgpu_force_sclk", 0200, root, adev, &fops_sclk_set); - if (!ent) { + if (IS_ERR(ent)) { DRM_ERROR("unable to create amdgpu_set_sclk debugsfs file\n"); - return -EIO; + return PTR_ERR(ent); } /* Register debugfs entries for amdgpu_ttm */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 41c6b3aacd37..ab3794c42d36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2394,6 +2394,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; + r = amdgpu_amdkfd_resume_iommu(adev); + if (r) + goto init_failed; + r = amdgpu_device_ip_hw_init_phase1(adev); if (r) goto init_failed; @@ -3148,6 +3152,10 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) { int r; + r = amdgpu_amdkfd_resume_iommu(adev); + if (r) + return r; + r = amdgpu_device_ip_resume_phase1(adev); if (r) return r; @@ -4601,6 +4609,10 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, dev_warn(tmp_adev->dev, "asic atom init failed!"); } else { dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); + r = amdgpu_amdkfd_resume_iommu(tmp_adev); + if (r) + goto out; + r = amdgpu_device_ip_resume_phase1(tmp_adev); if (r) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 8e5a7ac8c36f..7a7316731911 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -522,6 +522,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, break; case CHIP_RENOIR: case CHIP_VANGOGH: + case CHIP_YELLOW_CARP: domain |= AMDGPU_GEM_DOMAIN_GTT; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b6640291f980..f18240f87387 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1181,7 +1181,12 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x73A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, {0x1002, 0x73A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, {0x1002, 0x73A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + {0x1002, 0x73A5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + {0x1002, 0x73A8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + {0x1002, 0x73A9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, {0x1002, 0x73AB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + {0x1002, 0x73AC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + {0x1002, 0x73AD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, {0x1002, 0x73AE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, {0x1002, 0x73AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, {0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, @@ -1197,6 +1202,11 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x73C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, {0x1002, 0x73C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, {0x1002, 0x73C3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + {0x1002, 0x73DA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + {0x1002, 0x73DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + {0x1002, 0x73DC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + {0x1002, 0x73DD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + {0x1002, 0x73DE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, {0x1002, 0x73DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, /* DIMGREY_CAVEFISH */ @@ -1204,6 +1214,13 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x73E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, {0x1002, 0x73E2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, {0x1002, 0x73E3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73E8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73E9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73EA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73EB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73EC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73ED, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73EF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, {0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, /* Aldebaran */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index d94c5419ec25..5a6857c44bb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -59,6 +59,7 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f) uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0; struct drm_file *file = f->private_data; struct amdgpu_device *adev = drm_to_adev(file->minor->dev); + struct amdgpu_bo *root; int ret; ret = amdgpu_file_to_fpriv(f, &fpriv); @@ -69,13 +70,19 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f) dev = PCI_SLOT(adev->pdev->devfn); fn = PCI_FUNC(adev->pdev->devfn); - ret = amdgpu_bo_reserve(fpriv->vm.root.bo, false); + root = amdgpu_bo_ref(fpriv->vm.root.bo); + if (!root) + return; + + ret = amdgpu_bo_reserve(root, false); if (ret) { DRM_ERROR("Fail to reserve bo\n"); return; } amdgpu_vm_get_memory(&fpriv->vm, &vram_mem, >t_mem, &cpu_mem); - amdgpu_bo_unreserve(fpriv->vm.root.bo); + amdgpu_bo_unreserve(root); + amdgpu_bo_unref(&root); + seq_printf(m, "pdev:\t%04x:%02x:%02x.%d\npasid:\t%u\n", domain, bus, dev, fn, fpriv->vm.pasid); seq_printf(m, "vram mem:\t%llu kB\n", vram_mem/1024UL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 14499f0de32d..8d682befe0d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -552,6 +552,9 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev) if (!ring || !ring->fence_drv.initialized) continue; + if (!ring->no_scheduler) + drm_sched_stop(&ring->sched, NULL); + /* You can't wait for HW to signal if it's gone */ if (!drm_dev_is_unplugged(&adev->ddev)) r = amdgpu_fence_wait_empty(ring); @@ -611,6 +614,11 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) if (!ring || !ring->fence_drv.initialized) continue; + if (!ring->no_scheduler) { + drm_sched_resubmit_jobs(&ring->sched); + drm_sched_start(&ring->sched, true); + } + /* enable the interrupt */ if (ring->fence_drv.irq_src) amdgpu_irq_get(adev, ring->fence_drv.irq_src, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index cb07cc3b06ed..d6aa032890ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -341,21 +341,18 @@ retry: r = amdgpu_gem_object_create(adev, size, args->in.alignment, initial_domain, flags, ttm_bo_type_device, resv, &gobj); - if (r) { - if (r != -ERESTARTSYS) { - if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { - flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - goto retry; - } + if (r && r != -ERESTARTSYS) { + if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { + flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + goto retry; + } - if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { - initial_domain |= AMDGPU_GEM_DOMAIN_GTT; - goto retry; - } - DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n", - size, initial_domain, args->in.alignment, r); + if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { + initial_domain |= AMDGPU_GEM_DOMAIN_GTT; + goto retry; } - return r; + DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n", + size, initial_domain, args->in.alignment, r); } if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index c7797eac83c3..9ff600a38559 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -598,7 +598,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) break; default: adev->gmc.tmz_enabled = false; - dev_warn(adev->dev, + dev_info(adev->dev, "Trusted Memory Zone (TMZ) feature not supported\n"); break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 543000304a1c..675a72ef305d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -118,7 +118,7 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *res) * @man: TTM memory type manager * @tbo: TTM BO we need this range for * @place: placement flags and restrictions - * @mem: the resulting mem object + * @res: the resulting mem object * * Dummy, allocate the node but no space for it yet. */ @@ -182,7 +182,7 @@ err_out: * amdgpu_gtt_mgr_del - free ranges * * @man: TTM memory type manager - * @mem: TTM memory object + * @res: TTM memory object * * Free the allocated GTT again. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 23efdc672502..9b41cb8c3de5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -469,10 +469,10 @@ psp_cmd_submit_buf(struct psp_context *psp, */ if (!skip_unsupport && (psp->cmd_buf_mem->resp.status || !timeout) && !ras_intr) { if (ucode) - DRM_WARN("failed to load ucode (%s) ", - amdgpu_ucode_name(ucode->ucode_id)); - DRM_WARN("psp gfx command (%s) failed and response status is (0x%X)\n", - psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), + DRM_WARN("failed to load ucode %s(0x%X) ", + amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id); + DRM_WARN("psp gfx command %s(0x%X) failed and response status is (0x%X)\n", + psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id, psp->cmd_buf_mem->resp.status); if (!timeout) { ret = -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 9dc3b2d88176..98732518543e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -114,27 +114,24 @@ static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev, static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, struct amdgpu_ras_eeprom_control *control) { - uint8_t ras_rom_i2c_slave_addr; + u8 i2c_addr; if (!control) return false; - control->i2c_address = 0; - - if (amdgpu_atomfirmware_ras_rom_addr(adev, &ras_rom_i2c_slave_addr)) - { - switch (ras_rom_i2c_slave_addr) { - case 0xA0: - control->i2c_address = 0; - return true; - case 0xA8: - control->i2c_address = 0x40000; - return true; - default: - dev_warn(adev->dev, "RAS EEPROM I2C slave address %02x not supported", - ras_rom_i2c_slave_addr); - return false; - } + if (amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) { + /* The address given by VBIOS is an 8-bit, wire-format + * address, i.e. the most significant byte. + * + * Normalize it to a 19-bit EEPROM address. Remove the + * device type identifier and make it a 7-bit address; + * then make it a 19-bit EEPROM address. See top of + * amdgpu_eeprom.c. + */ + i2c_addr = (i2c_addr & 0x0F) >> 1; + control->i2c_address = ((u32) i2c_addr) << 16; + + return true; } switch (adev->asic_type) { @@ -760,7 +757,7 @@ Out: return res; } -inline uint32_t amdgpu_ras_eeprom_max_record_count(void) +uint32_t amdgpu_ras_eeprom_max_record_count(void) { return RAS_MAX_RECORD_COUNT; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index f95fc61b3021..6bb00578bfbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -120,7 +120,7 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, struct eeprom_table_record *records, const u32 num); -inline uint32_t amdgpu_ras_eeprom_max_record_count(void); +uint32_t amdgpu_ras_eeprom_max_record_count(void); void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 7b634a1517f9..0554576d3695 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -428,8 +428,8 @@ int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, ent = debugfs_create_file(name, S_IFREG | S_IRUGO, root, ring, &amdgpu_debugfs_ring_fops); - if (!ent) - return -ENOMEM; + if (IS_ERR(ent)) + return PTR_ERR(ent); i_size_write(ent->d_inode, ring->ring_size + 12); ring->ent = ent; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 38dade421d46..94126dc39688 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -515,6 +515,15 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, goto out; } + if (bo->type == ttm_bo_type_device && + new_mem->mem_type == TTM_PL_VRAM && + old_mem->mem_type != TTM_PL_VRAM) { + /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU + * accesses the BO after it's moved. + */ + abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + } + if (adev->mman.buffer_funcs_enabled) { if (((old_mem->mem_type == TTM_PL_SYSTEM && new_mem->mem_type == TTM_PL_VRAM) || @@ -545,15 +554,6 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; } - if (bo->type == ttm_bo_type_device && - new_mem->mem_type == TTM_PL_VRAM && - old_mem->mem_type != TTM_PL_VRAM) { - /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU - * accesses the BO after it's moved. - */ - abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - } - out: /* update statistics */ atomic64_add(bo->base.size, &adev->num_bytes_moved); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 2fd77c36a1ff..7b2b0980ec41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -361,7 +361,7 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem, * @man: TTM memory type manager * @tbo: TTM BO we need this range for * @place: placement flags and restrictions - * @mem: the resulting mem object + * @res: the resulting mem object * * Allocate VRAM for the given BO. */ @@ -487,7 +487,7 @@ error_sub: * amdgpu_vram_mgr_del - free ranges * * @man: TTM memory type manager - * @mem: TTM memory object + * @res: TTM memory object * * Free the allocated VRAM again. */ @@ -522,7 +522,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, * amdgpu_vram_mgr_alloc_sgt - allocate and fill a sg table * * @adev: amdgpu device pointer - * @mem: TTM memory object + * @res: TTM memory object * @offset: byte offset from the base of VRAM BO * @length: number of bytes to export in sg_table * @dev: the other device diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index ff2307d7ee0f..23b066bcffb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -258,6 +258,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) amdgpu_virt_fini_data_exchange(adev); atomic_set(&adev->in_gpu_reset, 1); + xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0); + do { if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) goto flr_done; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index 50572635d0f8..bd3b23171579 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -37,6 +37,7 @@ enum idh_request { IDH_REQ_GPU_RESET_ACCESS, IDH_LOG_VF_ERROR = 200, + IDH_READY_TO_RESET = 201, }; enum idh_event { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index ba1d3ab869c1..f50045cebd44 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -85,11 +85,14 @@ #define mmRCC_DEV0_EPF0_STRAP0_ALDE 0x0015 #define mmRCC_DEV0_EPF0_STRAP0_ALDE_BASE_IDX 2 -#define mmBIF_DOORBELL_INT_CNTL_ALDE 0x3878 +#define mmBIF_DOORBELL_INT_CNTL_ALDE 0x00fe #define mmBIF_DOORBELL_INT_CNTL_ALDE_BASE_IDX 2 #define BIF_DOORBELL_INT_CNTL_ALDE__DOORBELL_INTERRUPT_DISABLE__SHIFT 0x18 #define BIF_DOORBELL_INT_CNTL_ALDE__DOORBELL_INTERRUPT_DISABLE_MASK 0x01000000L +#define mmBIF_INTR_CNTL_ALDE 0x0101 +#define mmBIF_INTR_CNTL_ALDE_BASE_IDX 2 + static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status); @@ -440,14 +443,23 @@ static int nbio_v7_4_set_ras_controller_irq_state(struct amdgpu_device *adev, */ uint32_t bif_intr_cntl; - bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL); + if (adev->asic_type == CHIP_ALDEBARAN) + bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE); + else + bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL); + if (state == AMDGPU_IRQ_STATE_ENABLE) { /* set interrupt vector select bit to 0 to select * vetcor 1 for bare metal case */ bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl, BIF_INTR_CNTL, RAS_INTR_VEC_SEL, 0); - WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl); + + if (adev->asic_type == CHIP_ALDEBARAN) + WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE, bif_intr_cntl); + else + WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl); + } return 0; @@ -476,14 +488,22 @@ static int nbio_v7_4_set_ras_err_event_athub_irq_state(struct amdgpu_device *ade */ uint32_t bif_intr_cntl; - bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL); + if (adev->asic_type == CHIP_ALDEBARAN) + bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE); + else + bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL); + if (state == AMDGPU_IRQ_STATE_ENABLE) { /* set interrupt vector select bit to 0 to select * vetcor 1 for bare metal case */ bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl, BIF_INTR_CNTL, RAS_INTR_VEC_SEL, 0); - WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl); + + if (adev->asic_type == CHIP_ALDEBARAN) + WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE, bif_intr_cntl); + else + WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 42a35d9520f9..fe9a7cc8d9eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -904,14 +904,7 @@ static bool vi_asic_supports_baco(struct amdgpu_device *adev) case CHIP_POLARIS11: case CHIP_POLARIS12: case CHIP_TOPAZ: - /* Disable BACO support for the specific polaris12 SKU temporarily */ - if ((adev->pdev->device == 0x699F) && - (adev->pdev->revision == 0xC7) && - (adev->pdev->subsystem_vendor == 0x1028) && - (adev->pdev->subsystem_device == 0x0039)) - return false; - else - return amdgpu_dpm_is_baco_supported(adev); + return amdgpu_dpm_is_baco_supported(adev); default: return false; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 16a57b70cc1a..98d1b3ab3a46 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -468,6 +468,7 @@ static const struct kfd_device_info navi10_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 145, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -487,6 +488,7 @@ static const struct kfd_device_info navi12_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 145, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -506,6 +508,7 @@ static const struct kfd_device_info navi14_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 145, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -525,6 +528,7 @@ static const struct kfd_device_info sienna_cichlid_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 4, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -544,6 +548,7 @@ static const struct kfd_device_info navy_flounder_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -562,7 +567,8 @@ static const struct kfd_device_info vangogh_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 1, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, @@ -582,6 +588,7 @@ static const struct kfd_device_info dimgrey_cavefish_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 2, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -601,6 +608,7 @@ static const struct kfd_device_info beige_goby_device_info = { .needs_iommu_device = false, .supports_cwsr = true, .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 1, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, @@ -619,7 +627,8 @@ static const struct kfd_device_info yellow_carp_device_info = { .mqd_size_aligned = MQD_SIZE_ALIGNED, .needs_iommu_device = false, .supports_cwsr = true, - .needs_pci_atomics = false, + .needs_pci_atomics = true, + .no_atomic_fw_version = 92, .num_sdma_engines = 1, .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, @@ -708,20 +717,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, if (!kfd) return NULL; - /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. - * 32 and 64-bit requests are possible and must be - * supported. - */ - kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kgd); - if (device_info->needs_pci_atomics && - !kfd->pci_atomic_requested) { - dev_info(kfd_device, - "skipped device %x:%x, PCI rejects atomics\n", - pdev->vendor, pdev->device); - kfree(kfd); - return NULL; - } - kfd->kgd = kgd; kfd->device_info = device_info; kfd->pdev = pdev; @@ -821,6 +816,23 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd - kfd->vm_info.first_vmid_kfd + 1; + /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. + * 32 and 64-bit requests are possible and must be + * supported. + */ + kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->kgd); + if (!kfd->pci_atomic_requested && + kfd->device_info->needs_pci_atomics && + (!kfd->device_info->no_atomic_fw_version || + kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) { + dev_info(kfd_device, + "skipped device %x:%x, PCI rejects atomics %d<%d\n", + kfd->pdev->vendor, kfd->pdev->device, + kfd->mec_fw_version, + kfd->device_info->no_atomic_fw_version); + return false; + } + /* Verify module parameters regarding mapped process number*/ if ((hws_max_conc_proc < 0) || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { @@ -1057,17 +1069,21 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) return ret; } -static int kfd_resume(struct kfd_dev *kfd) +int kgd2kfd_resume_iommu(struct kfd_dev *kfd) { int err = 0; err = kfd_iommu_resume(kfd); - if (err) { + if (err) dev_err(kfd_device, "Failed to resume IOMMU for device %x:%x\n", kfd->pdev->vendor, kfd->pdev->device); - return err; - } + return err; +} + +static int kfd_resume(struct kfd_dev *kfd) +{ + int err = 0; err = kfd->dqm->ops.start(kfd->dqm); if (err) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index ab83b0de6b22..6d8f9bb2d905 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -207,6 +207,7 @@ struct kfd_device_info { bool supports_cwsr; bool needs_iommu_device; bool needs_pci_atomics; + uint32_t no_atomic_fw_version; unsigned int num_sdma_engines; unsigned int num_xgmi_sdma_engines; unsigned int num_sdma_queues_per_engine; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 491373fcdb38..9fc8021bb0ab 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2484,7 +2484,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, } if (!p->xnack_enabled) { pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); - return -EFAULT; + r = -EFAULT; + goto out; } svms = &p->svms; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 816723691d51..66c799f5c7cf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -998,6 +998,8 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ uint32_t agp_base, agp_bot, agp_top; PHYSICAL_ADDRESS_LOC page_table_start, page_table_end, page_table_base; + memset(pa_config, 0, sizeof(*pa_config)); + logical_addr_low = min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18; pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); @@ -1200,7 +1202,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) dc_hardware_init(adev->dm.dc); #if defined(CONFIG_DRM_AMD_DC_DCN) - if (adev->apu_flags) { + if ((adev->flags & AMD_IS_APU) && (adev->asic_type >= CHIP_CARRIZO)) { struct dc_phy_addr_space_config pa_config; mmhub_read_system_context(adev, &pa_config); @@ -6024,21 +6026,23 @@ static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable) return 0; #if defined(CONFIG_DRM_AMD_DC_DCN) - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) - return -ENOMEM; + if (dm->vblank_control_workqueue) { + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return -ENOMEM; - INIT_WORK(&work->work, vblank_control_worker); - work->dm = dm; - work->acrtc = acrtc; - work->enable = enable; + INIT_WORK(&work->work, vblank_control_worker); + work->dm = dm; + work->acrtc = acrtc; + work->enable = enable; - if (acrtc_state->stream) { - dc_stream_retain(acrtc_state->stream); - work->stream = acrtc_state->stream; - } + if (acrtc_state->stream) { + dc_stream_retain(acrtc_state->stream); + work->stream = acrtc_state->stream; + } - queue_work(dm->vblank_control_workqueue, &work->work); + queue_work(dm->vblank_control_workqueue, &work->work); + } #endif return 0; @@ -6792,14 +6796,15 @@ const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs = { #if defined(CONFIG_DRM_AMD_DC_DCN) static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, - struct dc_state *dc_state) + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars) { struct dc_stream_state *stream = NULL; struct drm_connector *connector; struct drm_connector_state *new_con_state; struct amdgpu_dm_connector *aconnector; struct dm_connector_state *dm_conn_state; - int i, j, clock, bpp; + int i, j, clock; int vcpi, pbn_div, pbn = 0; for_each_new_connector_in_state(state, connector, new_con_state, i) { @@ -6838,9 +6843,15 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, } pbn_div = dm_mst_get_pbn_divider(stream->link); - bpp = stream->timing.dsc_cfg.bits_per_pixel; clock = stream->timing.pix_clk_100hz / 10; - pbn = drm_dp_calc_pbn_mode(clock, bpp, true); + /* pbn is calculated by compute_mst_dsc_configs_for_state*/ + for (j = 0; j < dc_state->stream_count; j++) { + if (vars[j].aconnector == aconnector) { + pbn = vars[j].pbn; + break; + } + } + vcpi = drm_dp_mst_atomic_enable_dsc(state, aconnector->port, pbn, pbn_div, @@ -7519,6 +7530,32 @@ static void amdgpu_dm_connector_add_common_modes(struct drm_encoder *encoder, } } +static void amdgpu_set_panel_orientation(struct drm_connector *connector) +{ + struct drm_encoder *encoder; + struct amdgpu_encoder *amdgpu_encoder; + const struct drm_display_mode *native_mode; + + if (connector->connector_type != DRM_MODE_CONNECTOR_eDP && + connector->connector_type != DRM_MODE_CONNECTOR_LVDS) + return; + + encoder = amdgpu_dm_connector_to_encoder(connector); + if (!encoder) + return; + + amdgpu_encoder = to_amdgpu_encoder(encoder); + + native_mode = &amdgpu_encoder->native_mode; + if (native_mode->hdisplay == 0 || native_mode->vdisplay == 0) + return; + + drm_connector_set_panel_orientation_with_quirk(connector, + DRM_MODE_PANEL_ORIENTATION_UNKNOWN, + native_mode->hdisplay, + native_mode->vdisplay); +} + static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector, struct edid *edid) { @@ -7547,6 +7584,8 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector, * restored here. */ amdgpu_dm_update_freesync_caps(connector, edid); + + amdgpu_set_panel_orientation(connector); } else { amdgpu_dm_connector->num_modes = 0; } @@ -8058,8 +8097,26 @@ static bool is_content_protection_different(struct drm_connector_state *state, state->content_protection == DRM_MODE_CONTENT_PROTECTION_ENABLED) state->content_protection = DRM_MODE_CONTENT_PROTECTION_DESIRED; - /* Check if something is connected/enabled, otherwise we start hdcp but nothing is connected/enabled - * hot-plug, headless s3, dpms + /* Stream removed and re-enabled + * + * Can sometimes overlap with the HPD case, + * thus set update_hdcp to false to avoid + * setting HDCP multiple times. + * + * Handles: DESIRED -> DESIRED (Special case) + */ + if (!(old_state->crtc && old_state->crtc->enabled) && + state->crtc && state->crtc->enabled && + connector->state->content_protection == DRM_MODE_CONTENT_PROTECTION_DESIRED) { + dm_con_state->update_hdcp = false; + return true; + } + + /* Hot-plug, headless s3, dpms + * + * Only start HDCP if the display is connected/enabled. + * update_hdcp flag will be set to false until the next + * HPD comes in. * * Handles: DESIRED -> DESIRED (Special case) */ @@ -8648,7 +8705,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, * If PSR or idle optimizations are enabled then flush out * any pending work before hardware programming. */ - flush_workqueue(dm->vblank_control_workqueue); + if (dm->vblank_control_workqueue) + flush_workqueue(dm->vblank_control_workqueue); #endif bundle->stream_update.stream = acrtc_state->stream; @@ -8983,7 +9041,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) /* if there mode set or reset, disable eDP PSR */ if (mode_set_reset_required) { #if defined(CONFIG_DRM_AMD_DC_DCN) - flush_workqueue(dm->vblank_control_workqueue); + if (dm->vblank_control_workqueue) + flush_workqueue(dm->vblank_control_workqueue); #endif amdgpu_dm_psr_disable_all(dm); } @@ -10243,6 +10302,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, int ret, i; bool lock_and_validation_needed = false; struct dm_crtc_state *dm_old_crtc_state; +#if defined(CONFIG_DRM_AMD_DC_DCN) + struct dsc_mst_fairness_vars vars[MAX_PIPES]; +#endif trace_amdgpu_dm_atomic_check_begin(state); @@ -10473,10 +10535,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, goto fail; #if defined(CONFIG_DRM_AMD_DC_DCN) - if (!compute_mst_dsc_configs_for_state(state, dm_state->context)) + if (!compute_mst_dsc_configs_for_state(state, dm_state->context, vars)) goto fail; - ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context); + ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars); if (ret) goto fail; #endif @@ -10492,7 +10554,8 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, goto fail; status = dc_validate_global_state(dc, dm_state->context, false); if (status != DC_OK) { - DC_LOG_WARNING("DC global validation failure: %s (%d)", + drm_dbg_atomic(dev, + "DC global validation failure: %s (%d)", dc_status_to_str(status), status); ret = -EINVAL; goto fail; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 1bcba6943fd7..7af0d58c231b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -518,12 +518,7 @@ struct dsc_mst_fairness_params { uint32_t num_slices_h; uint32_t num_slices_v; uint32_t bpp_overwrite; -}; - -struct dsc_mst_fairness_vars { - int pbn; - bool dsc_enabled; - int bpp_x16; + struct amdgpu_dm_connector *aconnector; }; static int kbps_to_peak_pbn(int kbps) @@ -750,12 +745,12 @@ static void try_disable_dsc(struct drm_atomic_state *state, static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, struct dc_state *dc_state, - struct dc_link *dc_link) + struct dc_link *dc_link, + struct dsc_mst_fairness_vars *vars) { int i; struct dc_stream_state *stream; struct dsc_mst_fairness_params params[MAX_PIPES]; - struct dsc_mst_fairness_vars vars[MAX_PIPES]; struct amdgpu_dm_connector *aconnector; int count = 0; bool debugfs_overwrite = false; @@ -776,6 +771,7 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, params[count].timing = &stream->timing; params[count].sink = stream->sink; aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; + params[count].aconnector = aconnector; params[count].port = aconnector->port; params[count].clock_force_enable = aconnector->dsc_settings.dsc_force_enable; if (params[count].clock_force_enable == DSC_CLK_FORCE_ENABLE) @@ -798,6 +794,7 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, } /* Try no compression */ for (i = 0; i < count; i++) { + vars[i].aconnector = params[i].aconnector; vars[i].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); vars[i].dsc_enabled = false; vars[i].bpp_x16 = 0; @@ -851,7 +848,8 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, } bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, - struct dc_state *dc_state) + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars) { int i, j; struct dc_stream_state *stream; @@ -882,7 +880,7 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, return false; mutex_lock(&aconnector->mst_mgr.lock); - if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link)) { + if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars)) { mutex_unlock(&aconnector->mst_mgr.lock); return false; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index b38bd68121ce..900d3f7a8498 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -39,8 +39,17 @@ void dm_dp_create_fake_mst_encoders(struct amdgpu_device *adev); #if defined(CONFIG_DRM_AMD_DC_DCN) + +struct dsc_mst_fairness_vars { + int pbn; + bool dsc_enabled; + int bpp_x16; + struct amdgpu_dm_connector *aconnector; +}; + bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, - struct dc_state *dc_state); + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars); #endif #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c index c9f47d167472..b1bf80da3a55 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c @@ -62,7 +62,7 @@ inline void dc_assert_fp_enabled(void) depth = *pcpu; put_cpu_ptr(&fpu_recursion_depth); - ASSERT(depth > 1); + ASSERT(depth >= 1); } /** diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 8bd7f42a8053..1e44b13c1c7d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2586,13 +2586,21 @@ static struct abm *get_abm_from_stream_res(const struct dc_link *link) int dc_link_get_backlight_level(const struct dc_link *link) { - struct abm *abm = get_abm_from_stream_res(link); + struct panel_cntl *panel_cntl = link->panel_cntl; + struct dc *dc = link->ctx->dc; + struct dmcu *dmcu = dc->res_pool->dmcu; + bool fw_set_brightness = true; - if (abm == NULL || abm->funcs->get_current_backlight == NULL) - return DC_ERROR_UNEXPECTED; + if (dmcu) + fw_set_brightness = dmcu->funcs->is_dmcu_initialized(dmcu); - return (int) abm->funcs->get_current_backlight(abm); + if (!fw_set_brightness && panel_cntl->funcs->get_current_backlight) + return panel_cntl->funcs->get_current_backlight(panel_cntl); + else if (abm != NULL && abm->funcs->get_current_backlight != NULL) + return (int) abm->funcs->get_current_backlight(abm); + else + return DC_ERROR_UNEXPECTED; } int dc_link_get_target_backlight_pwm(const struct dc_link *link) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index cd025c12f17b..f6dbc5a74757 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1,4 +1,26 @@ -/* Copyright 2015 Advanced Micro Devices, Inc. */ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + */ #include "dm_services.h" #include "dc.h" #include "dc_link_dp.h" @@ -1561,7 +1583,7 @@ bool dc_link_dp_perform_link_training_skip_aux( struct dc_link *link, const struct dc_link_settings *link_setting) { - struct link_training_settings lt_settings; + struct link_training_settings lt_settings = {0}; dp_decide_training_settings( link, @@ -1707,7 +1729,7 @@ enum link_training_result dc_link_dp_perform_link_training( bool skip_video_pattern) { enum link_training_result status = LINK_TRAINING_SUCCESS; - struct link_training_settings lt_settings; + struct link_training_settings lt_settings = {0}; enum dp_link_encoding encoding = dp_get_link_encoding_format(link_settings); @@ -1840,9 +1862,13 @@ bool perform_link_training_with_retries( dp_disable_link_phy(link, signal); /* Abort link training if failure due to sink being unplugged. */ - if (status == LINK_TRAINING_ABORT) - break; - else if (do_fallback) { + if (status == LINK_TRAINING_ABORT) { + enum dc_connection_type type = dc_connection_none; + + dc_link_detect_sink(link, &type); + if (type == dc_connection_none) + break; + } else if (do_fallback) { decide_fallback_link_setting(*link_setting, ¤t_setting, status); /* Fail link training if reduced link bandwidth no longer meets * stream requirements. @@ -1923,7 +1949,7 @@ enum link_training_result dc_link_dp_sync_lt_attempt( struct dc_link_settings *link_settings, struct dc_link_training_overrides *lt_overrides) { - struct link_training_settings lt_settings; + struct link_training_settings lt_settings = {0}; enum link_training_result lt_status = LINK_TRAINING_SUCCESS; enum dp_panel_mode panel_mode = DP_PANEL_MODE_DEFAULT; enum clock_source_id dp_cs_id = CLOCK_SOURCE_ID_EXTERNAL; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c index e92339235863..e8570060d007 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c @@ -49,7 +49,6 @@ static unsigned int dce_get_16_bit_backlight_from_pwm(struct panel_cntl *panel_cntl) { uint64_t current_backlight; - uint32_t round_result; uint32_t bl_period, bl_int_count; uint32_t bl_pwm, fractional_duty_cycle_en; uint32_t bl_period_mask, bl_pwm_mask; @@ -84,15 +83,6 @@ static unsigned int dce_get_16_bit_backlight_from_pwm(struct panel_cntl *panel_c current_backlight = div_u64(current_backlight, bl_period); current_backlight = (current_backlight + 1) >> 1; - current_backlight = (uint64_t)(current_backlight) * bl_period; - - round_result = (uint32_t)(current_backlight & 0xFFFFFFFF); - - round_result = (round_result >> (bl_int_count-1)) & 1; - - current_backlight >>= bl_int_count; - current_backlight += round_result; - return (uint32_t)(current_backlight); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index dc7823d23ba8..dd38796ba30a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -510,8 +510,12 @@ static struct stream_encoder *dcn303_stream_encoder_create(enum engine_id eng_id vpg = dcn303_vpg_create(ctx, vpg_inst); afmt = dcn303_afmt_create(ctx, afmt_inst); - if (!enc1 || !vpg || !afmt) + if (!enc1 || !vpg || !afmt) { + kfree(enc1); + kfree(vpg); + kfree(afmt); return NULL; + } dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, vpg, afmt, &stream_enc_regs[eng_id], &se_shift, &se_mask); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c index 7db268da6976..3b3721386571 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c @@ -109,7 +109,7 @@ bool dcn31_is_panel_backlight_on(struct panel_cntl *panel_cntl) union dmub_rb_cmd cmd; if (!dcn31_query_backlight_info(panel_cntl, &cmd)) - return 0; + return false; return cmd.panel_cntl.data.is_backlight_on; } @@ -119,7 +119,7 @@ bool dcn31_is_panel_powered_on(struct panel_cntl *panel_cntl) union dmub_rb_cmd cmd; if (!dcn31_query_backlight_info(panel_cntl, &cmd)) - return 0; + return false; return cmd.panel_cntl.data.is_powered_on; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c index fbed5304692d..63bbdf8b8678 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -2641,7 +2641,7 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { if (mode_lib->vba.DRAMClockChangeWatermark > - dml_max(mode_lib->vba.StutterEnterPlusExitWatermark, mode_lib->vba.UrgentWatermark)) + dml_max(mode_lib->vba.StutterEnterPlusExitWatermark, mode_lib->vba.UrgentWatermark)) mode_lib->vba.MinTTUVBlank[k] += 25; } } diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 2d55627b05b1..249cb0aeb5ae 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2005,10 +2005,10 @@ static int ss_bias_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ static struct amdgpu_device_attr amdgpu_device_attrs[] = { AMDGPU_DEVICE_ATTR_RW(power_dpm_state, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), - AMDGPU_DEVICE_ATTR_RO(pp_num_states, ATTR_FLAG_BASIC), - AMDGPU_DEVICE_ATTR_RO(pp_cur_state, ATTR_FLAG_BASIC), - AMDGPU_DEVICE_ATTR_RW(pp_force_state, ATTR_FLAG_BASIC), - AMDGPU_DEVICE_ATTR_RW(pp_table, ATTR_FLAG_BASIC), + AMDGPU_DEVICE_ATTR_RO(pp_num_states, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), + AMDGPU_DEVICE_ATTR_RO(pp_cur_state, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), + AMDGPU_DEVICE_ATTR_RW(pp_force_state, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), + AMDGPU_DEVICE_ATTR_RW(pp_table, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), AMDGPU_DEVICE_ATTR_RW(pp_dpm_sclk, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), AMDGPU_DEVICE_ATTR_RW(pp_dpm_mclk, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), AMDGPU_DEVICE_ATTR_RW(pp_dpm_socclk, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h index 715b4225f5ee..8156729c370b 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h @@ -1335,6 +1335,30 @@ enum smu_cmn2asic_mapping_type { #define WORKLOAD_MAP(profile, workload) \ [profile] = {1, (workload)} +/** + * smu_memcpy_trailing - Copy the end of one structure into the middle of another + * + * @dst: Pointer to destination struct + * @first_dst_member: The member name in @dst where the overwrite begins + * @last_dst_member: The member name in @dst where the overwrite ends after + * @src: Pointer to the source struct + * @first_src_member: The member name in @src where the copy begins + * + */ +#define smu_memcpy_trailing(dst, first_dst_member, last_dst_member, \ + src, first_src_member) \ +({ \ + size_t __src_offset = offsetof(typeof(*(src)), first_src_member); \ + size_t __src_size = sizeof(*(src)) - __src_offset; \ + size_t __dst_offset = offsetof(typeof(*(dst)), first_dst_member); \ + size_t __dst_size = offsetofend(typeof(*(dst)), last_dst_member) - \ + __dst_offset; \ + BUILD_BUG_ON(__src_size != __dst_size); \ + __builtin_memcpy((u8 *)(dst) + __dst_offset, \ + (u8 *)(src) + __src_offset, \ + __dst_size); \ +}) + #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && !defined(SWSMU_CODE_LAYER_L4) int smu_get_power_limit(void *handle, uint32_t *limit, diff --git a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h index 8a08ecc34c69..4884a4e1f261 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h +++ b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_cyan_skillfish.h @@ -33,63 +33,47 @@ #define TABLE_PMSTATUSLOG 3 // Called by Tools for Agm logging #define TABLE_DPMCLOCKS 4 // Called by Driver; defined here, but not used, for backward compatible #define TABLE_MOMENTARY_PM 5 // Called by Tools; defined here, but not used, for backward compatible -#define TABLE_COUNT 6 +#define TABLE_SMU_METRICS 6 // Called by Driver +#define TABLE_COUNT 7 -#define NUM_DSPCLK_LEVELS 8 -#define NUM_SOCCLK_DPM_LEVELS 8 -#define NUM_DCEFCLK_DPM_LEVELS 4 -#define NUM_FCLK_DPM_LEVELS 4 -#define NUM_MEMCLK_DPM_LEVELS 4 +typedef struct SmuMetricsTable_t { + //CPU status + uint16_t CoreFrequency[6]; //[MHz] + uint32_t CorePower[6]; //[mW] + uint16_t CoreTemperature[6]; //[centi-Celsius] + uint16_t L3Frequency[2]; //[MHz] + uint16_t L3Temperature[2]; //[centi-Celsius] + uint16_t C0Residency[6]; //Percentage -#define NUMBER_OF_PSTATES 8 -#define NUMBER_OF_CORES 8 + // GFX status + uint16_t GfxclkFrequency; //[MHz] + uint16_t GfxTemperature; //[centi-Celsius] -typedef enum { - S3_TYPE_ENTRY, - S5_TYPE_ENTRY, -} Sleep_Type_e; + // SOC IP info + uint16_t SocclkFrequency; //[MHz] + uint16_t VclkFrequency; //[MHz] + uint16_t DclkFrequency; //[MHz] + uint16_t MemclkFrequency; //[MHz] -typedef enum { - GFX_OFF = 0, - GFX_ON = 1, -} GFX_Mode_e; + // power, VF info for CPU/GFX telemetry rails, and then socket power total + uint32_t Voltage[2]; //[mV] indices: VDDCR_VDD, VDDCR_GFX + uint32_t Current[2]; //[mA] indices: VDDCR_VDD, VDDCR_GFX + uint32_t Power[2]; //[mW] indices: VDDCR_VDD, VDDCR_GFX + uint32_t CurrentSocketPower; //[mW] -typedef enum { - CPU_P0 = 0, - CPU_P1, - CPU_P2, - CPU_P3, - CPU_P4, - CPU_P5, - CPU_P6, - CPU_P7 -} CPU_PState_e; + uint16_t SocTemperature; //[centi-Celsius] + uint16_t EdgeTemperature; + uint16_t ThrottlerStatus; + uint16_t Spare; -typedef enum { - CPU_CORE0 = 0, - CPU_CORE1, - CPU_CORE2, - CPU_CORE3, - CPU_CORE4, - CPU_CORE5, - CPU_CORE6, - CPU_CORE7 -} CORE_ID_e; +} SmuMetricsTable_t; -typedef enum { - DF_DPM0 = 0, - DF_DPM1, - DF_DPM2, - DF_DPM3, - DF_PState_Count -} DF_PState_e; - -typedef enum { - GFX_DPM0 = 0, - GFX_DPM1, - GFX_DPM2, - GFX_DPM3, - GFX_PState_Count -} GFX_PState_e; +typedef struct SmuMetrics_t { + SmuMetricsTable_t Current; + SmuMetricsTable_t Average; + uint32_t SampleStartTime; + uint32_t SampleStopTime; + uint32_t Accnt; +} SmuMetrics_t; #endif diff --git a/drivers/gpu/drm/amd/pm/inc/smu_types.h b/drivers/gpu/drm/amd/pm/inc/smu_types.h index 6f1b1b50d527..18b862a90fbe 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_types.h @@ -226,7 +226,10 @@ __SMU_DUMMY_MAP(SetUclkDpmMode), \ __SMU_DUMMY_MAP(LightSBR), \ __SMU_DUMMY_MAP(GfxDriverResetRecovery), \ - __SMU_DUMMY_MAP(BoardPowerCalibration), + __SMU_DUMMY_MAP(BoardPowerCalibration), \ + __SMU_DUMMY_MAP(RequestGfxclk), \ + __SMU_DUMMY_MAP(ForceGfxVid), \ + __SMU_DUMMY_MAP(UnforceGfxVid), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h index 6e6088760b18..909a86aa60f3 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_8_ppsmc.h @@ -65,6 +65,13 @@ #define PPSMC_MSG_SetDriverTableVMID 0x34 #define PPSMC_MSG_SetSoftMinCclk 0x35 #define PPSMC_MSG_SetSoftMaxCclk 0x36 -#define PPSMC_Message_Count 0x37 +#define PPSMC_MSG_GetGfxFrequency 0x37 +#define PPSMC_MSG_GetGfxVid 0x38 +#define PPSMC_MSG_ForceGfxFreq 0x39 +#define PPSMC_MSG_UnForceGfxFreq 0x3A +#define PPSMC_MSG_ForceGfxVid 0x3B +#define PPSMC_MSG_UnforceGfxVid 0x3C +#define PPSMC_MSG_GetEnabledSmuFeatures 0x3D +#define PPSMC_Message_Count 0x3E #endif diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index 465ff8d2a01a..e7803ce8f67a 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -27,6 +27,9 @@ #include <linux/pci.h> #include <linux/slab.h> #include <asm/div64.h> +#if IS_ENABLED(CONFIG_X86_64) +#include <asm/intel-family.h> +#endif #include <drm/amdgpu_drm.h> #include "ppatomctrl.h" #include "atombios.h" @@ -1733,6 +1736,17 @@ static int smu7_disable_dpm_tasks(struct pp_hwmgr *hwmgr) return result; } +static bool intel_core_rkl_chk(void) +{ +#if IS_ENABLED(CONFIG_X86_64) + struct cpuinfo_x86 *c = &cpu_data(0); + + return (c->x86 == 6 && c->x86_model == INTEL_FAM6_ROCKETLAKE); +#else + return false; +#endif +} + static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -1758,7 +1772,8 @@ static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr) data->mclk_dpm_key_disabled = hwmgr->feature_mask & PP_MCLK_DPM_MASK ? false : true; data->sclk_dpm_key_disabled = hwmgr->feature_mask & PP_SCLK_DPM_MASK ? false : true; - data->pcie_dpm_key_disabled = hwmgr->feature_mask & PP_PCIE_DPM_MASK ? false : true; + data->pcie_dpm_key_disabled = + intel_core_rkl_chk() || !(hwmgr->feature_mask & PP_PCIE_DPM_MASK); /* need to set voltage control types before EVV patching */ data->voltage_control = SMU7_VOLTAGE_CONTROL_NONE; data->vddci_control = SMU7_VOLTAGE_CONTROL_NONE; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 3ab1ce4d3419..04863a797115 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1404,7 +1404,7 @@ static int smu_disable_dpms(struct smu_context *smu) */ if (smu->uploading_custom_pp_table && (adev->asic_type >= CHIP_NAVI10) && - (adev->asic_type <= CHIP_DIMGREY_CAVEFISH)) + (adev->asic_type <= CHIP_BEIGE_GOBY)) return smu_disable_all_features_with_exception(smu, true, SMU_FEATURE_COUNT); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 273df66cac14..082f01893f3d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -483,10 +483,8 @@ static int arcturus_append_powerplay_table(struct smu_context *smu) if ((smc_dpm_table->table_header.format_revision == 4) && (smc_dpm_table->table_header.content_revision == 6)) - memcpy(&smc_pptable->MaxVoltageStepGfx, - &smc_dpm_table->maxvoltagestepgfx, - sizeof(*smc_dpm_table) - offsetof(struct atom_smc_dpm_info_v4_6, maxvoltagestepgfx)); - + smu_memcpy_trailing(smc_pptable, MaxVoltageStepGfx, BoardReserved, + smc_dpm_table, maxvoltagestepgfx); return 0; } @@ -773,8 +771,12 @@ static int arcturus_print_clk_levels(struct smu_context *smu, struct smu_11_0_dpm_context *dpm_context = NULL; uint32_t gen_speed, lane_width; - if (amdgpu_ras_intr_triggered()) - return sysfs_emit(buf, "unavailable\n"); + smu_cmn_get_sysfs_buf(&buf, &size); + + if (amdgpu_ras_intr_triggered()) { + size += sysfs_emit_at(buf, size, "unavailable\n"); + return size; + } dpm_context = smu_dpm->dpm_context; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c index b05f9541accc..3d4c65bc29dc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c @@ -44,6 +44,27 @@ #undef pr_info #undef pr_debug +/* unit: MHz */ +#define CYAN_SKILLFISH_SCLK_MIN 1000 +#define CYAN_SKILLFISH_SCLK_MAX 2000 +#define CYAN_SKILLFISH_SCLK_DEFAULT 1800 + +/* unit: mV */ +#define CYAN_SKILLFISH_VDDC_MIN 700 +#define CYAN_SKILLFISH_VDDC_MAX 1129 +#define CYAN_SKILLFISH_VDDC_MAGIC 5118 // 0x13fe + +static struct gfx_user_settings { + uint32_t sclk; + uint32_t vddc; +} cyan_skillfish_user_settings; + +#define FEATURE_MASK(feature) (1ULL << feature) +#define SMC_DPM_FEATURE ( \ + FEATURE_MASK(FEATURE_FCLK_DPM_BIT) | \ + FEATURE_MASK(FEATURE_SOC_DPM_BIT) | \ + FEATURE_MASK(FEATURE_GFX_DPM_BIT)) + static struct cmn2asic_msg_mapping cyan_skillfish_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 0), @@ -52,14 +73,473 @@ static struct cmn2asic_msg_mapping cyan_skillfish_message_map[SMU_MSG_MAX_COUNT] MSG_MAP(SetDriverDramAddrLow, PPSMC_MSG_SetDriverTableDramAddrLow, 0), MSG_MAP(TransferTableSmu2Dram, PPSMC_MSG_TransferTableSmu2Dram, 0), MSG_MAP(TransferTableDram2Smu, PPSMC_MSG_TransferTableDram2Smu, 0), + MSG_MAP(GetEnabledSmuFeatures, PPSMC_MSG_GetEnabledSmuFeatures, 0), + MSG_MAP(RequestGfxclk, PPSMC_MSG_RequestGfxclk, 0), + MSG_MAP(ForceGfxVid, PPSMC_MSG_ForceGfxVid, 0), + MSG_MAP(UnforceGfxVid, PPSMC_MSG_UnforceGfxVid, 0), +}; + +static struct cmn2asic_mapping cyan_skillfish_table_map[SMU_TABLE_COUNT] = { + TAB_MAP_VALID(SMU_METRICS), }; +static int cyan_skillfish_tables_init(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *tables = smu_table->tables; + + SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, + sizeof(SmuMetrics_t), + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM); + + smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL); + if (!smu_table->metrics_table) + goto err0_out; + + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2); + smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); + if (!smu_table->gpu_metrics_table) + goto err1_out; + + smu_table->metrics_time = 0; + + return 0; + +err1_out: + smu_table->gpu_metrics_table_size = 0; + kfree(smu_table->metrics_table); +err0_out: + return -ENOMEM; +} + +static int cyan_skillfish_init_smc_tables(struct smu_context *smu) +{ + int ret = 0; + + ret = cyan_skillfish_tables_init(smu); + if (ret) + return ret; + + return smu_v11_0_init_smc_tables(smu); +} + +static int cyan_skillfish_finit_smc_tables(struct smu_context *smu) +{ + struct smu_table_context *smu_table = &smu->smu_table; + + kfree(smu_table->metrics_table); + smu_table->metrics_table = NULL; + + kfree(smu_table->gpu_metrics_table); + smu_table->gpu_metrics_table = NULL; + smu_table->gpu_metrics_table_size = 0; + + smu_table->metrics_time = 0; + + return 0; +} + +static int +cyan_skillfish_get_smu_metrics_data(struct smu_context *smu, + MetricsMember_t member, + uint32_t *value) +{ + struct smu_table_context *smu_table = &smu->smu_table; + SmuMetrics_t *metrics = (SmuMetrics_t *)smu_table->metrics_table; + int ret = 0; + + mutex_lock(&smu->metrics_lock); + + ret = smu_cmn_get_metrics_table_locked(smu, NULL, false); + if (ret) { + mutex_unlock(&smu->metrics_lock); + return ret; + } + + switch (member) { + case METRICS_CURR_GFXCLK: + *value = metrics->Current.GfxclkFrequency; + break; + case METRICS_CURR_SOCCLK: + *value = metrics->Current.SocclkFrequency; + break; + case METRICS_CURR_VCLK: + *value = metrics->Current.VclkFrequency; + break; + case METRICS_CURR_DCLK: + *value = metrics->Current.DclkFrequency; + break; + case METRICS_CURR_UCLK: + *value = metrics->Current.MemclkFrequency; + break; + case METRICS_AVERAGE_SOCKETPOWER: + *value = (metrics->Current.CurrentSocketPower << 8) / + 1000; + break; + case METRICS_TEMPERATURE_EDGE: + *value = metrics->Current.GfxTemperature / 100 * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + break; + case METRICS_TEMPERATURE_HOTSPOT: + *value = metrics->Current.SocTemperature / 100 * + SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; + break; + case METRICS_VOLTAGE_VDDSOC: + *value = metrics->Current.Voltage[0]; + break; + case METRICS_VOLTAGE_VDDGFX: + *value = metrics->Current.Voltage[1]; + break; + case METRICS_THROTTLER_STATUS: + *value = metrics->Current.ThrottlerStatus; + break; + default: + *value = UINT_MAX; + break; + } + + mutex_unlock(&smu->metrics_lock); + + return ret; +} + +static int cyan_skillfish_read_sensor(struct smu_context *smu, + enum amd_pp_sensors sensor, + void *data, + uint32_t *size) +{ + int ret = 0; + + if (!data || !size) + return -EINVAL; + + mutex_lock(&smu->sensor_lock); + + switch (sensor) { + case AMDGPU_PP_SENSOR_GFX_SCLK: + ret = cyan_skillfish_get_smu_metrics_data(smu, + METRICS_CURR_GFXCLK, + (uint32_t *)data); + *(uint32_t *)data *= 100; + *size = 4; + break; + case AMDGPU_PP_SENSOR_GFX_MCLK: + ret = cyan_skillfish_get_smu_metrics_data(smu, + METRICS_CURR_UCLK, + (uint32_t *)data); + *(uint32_t *)data *= 100; + *size = 4; + break; + case AMDGPU_PP_SENSOR_GPU_POWER: + ret = cyan_skillfish_get_smu_metrics_data(smu, + METRICS_AVERAGE_SOCKETPOWER, + (uint32_t *)data); + *size = 4; + break; + case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: + ret = cyan_skillfish_get_smu_metrics_data(smu, + METRICS_TEMPERATURE_HOTSPOT, + (uint32_t *)data); + *size = 4; + break; + case AMDGPU_PP_SENSOR_EDGE_TEMP: + ret = cyan_skillfish_get_smu_metrics_data(smu, + METRICS_TEMPERATURE_EDGE, + (uint32_t *)data); + *size = 4; + break; + case AMDGPU_PP_SENSOR_VDDNB: + ret = cyan_skillfish_get_smu_metrics_data(smu, + METRICS_VOLTAGE_VDDSOC, + (uint32_t *)data); + *size = 4; + break; + case AMDGPU_PP_SENSOR_VDDGFX: + ret = cyan_skillfish_get_smu_metrics_data(smu, + METRICS_VOLTAGE_VDDGFX, + (uint32_t *)data); + *size = 4; + break; + default: + ret = -EOPNOTSUPP; + break; + } + + mutex_unlock(&smu->sensor_lock); + + return ret; +} + +static int cyan_skillfish_get_current_clk_freq(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *value) +{ + MetricsMember_t member_type; + + switch (clk_type) { + case SMU_GFXCLK: + case SMU_SCLK: + member_type = METRICS_CURR_GFXCLK; + break; + case SMU_FCLK: + case SMU_MCLK: + member_type = METRICS_CURR_UCLK; + break; + case SMU_SOCCLK: + member_type = METRICS_CURR_SOCCLK; + break; + case SMU_VCLK: + member_type = METRICS_CURR_VCLK; + break; + case SMU_DCLK: + member_type = METRICS_CURR_DCLK; + break; + default: + return -EINVAL; + } + + return cyan_skillfish_get_smu_metrics_data(smu, member_type, value); +} + +static int cyan_skillfish_print_clk_levels(struct smu_context *smu, + enum smu_clk_type clk_type, + char *buf) +{ + int ret = 0, size = 0; + uint32_t cur_value = 0; + + smu_cmn_get_sysfs_buf(&buf, &size); + + switch (clk_type) { + case SMU_OD_SCLK: + ret = cyan_skillfish_get_smu_metrics_data(smu, METRICS_CURR_GFXCLK, &cur_value); + if (ret) + return ret; + size += sysfs_emit_at(buf, size,"%s:\n", "OD_SCLK"); + size += sysfs_emit_at(buf, size, "0: %uMhz *\n", cur_value); + break; + case SMU_OD_VDDC_CURVE: + ret = cyan_skillfish_get_smu_metrics_data(smu, METRICS_VOLTAGE_VDDGFX, &cur_value); + if (ret) + return ret; + size += sysfs_emit_at(buf, size,"%s:\n", "OD_VDDC"); + size += sysfs_emit_at(buf, size, "0: %umV *\n", cur_value); + break; + case SMU_OD_RANGE: + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", + CYAN_SKILLFISH_SCLK_MIN, CYAN_SKILLFISH_SCLK_MAX); + size += sysfs_emit_at(buf, size, "VDDC: %7umV %10umV\n", + CYAN_SKILLFISH_VDDC_MIN, CYAN_SKILLFISH_VDDC_MAX); + break; + case SMU_GFXCLK: + case SMU_SCLK: + case SMU_FCLK: + case SMU_MCLK: + case SMU_SOCCLK: + case SMU_VCLK: + case SMU_DCLK: + ret = cyan_skillfish_get_current_clk_freq(smu, clk_type, &cur_value); + if (ret) + return ret; + size += sysfs_emit_at(buf, size, "0: %uMhz *\n", cur_value); + break; + default: + dev_warn(smu->adev->dev, "Unsupported clock type\n"); + return ret; + } + + return size; +} + +static bool cyan_skillfish_is_dpm_running(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + int ret = 0; + uint32_t feature_mask[2]; + uint64_t feature_enabled; + + /* we need to re-init after suspend so return false */ + if (adev->in_suspend) + return false; + + ret = smu_cmn_get_enabled_32_bits_mask(smu, feature_mask, 2); + + if (ret) + return false; + + feature_enabled = (uint64_t)feature_mask[0] | + ((uint64_t)feature_mask[1] << 32); + + return !!(feature_enabled & SMC_DPM_FEATURE); +} + +static ssize_t cyan_skillfish_get_gpu_metrics(struct smu_context *smu, + void **table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct gpu_metrics_v2_2 *gpu_metrics = + (struct gpu_metrics_v2_2 *)smu_table->gpu_metrics_table; + SmuMetrics_t metrics; + int i, ret = 0; + + ret = smu_cmn_get_metrics_table(smu, &metrics, true); + if (ret) + return ret; + + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 2, 2); + + gpu_metrics->temperature_gfx = metrics.Current.GfxTemperature; + gpu_metrics->temperature_soc = metrics.Current.SocTemperature; + + gpu_metrics->average_socket_power = metrics.Current.CurrentSocketPower; + gpu_metrics->average_soc_power = metrics.Current.Power[0]; + gpu_metrics->average_gfx_power = metrics.Current.Power[1]; + + gpu_metrics->average_gfxclk_frequency = metrics.Average.GfxclkFrequency; + gpu_metrics->average_socclk_frequency = metrics.Average.SocclkFrequency; + gpu_metrics->average_uclk_frequency = metrics.Average.MemclkFrequency; + gpu_metrics->average_fclk_frequency = metrics.Average.MemclkFrequency; + gpu_metrics->average_vclk_frequency = metrics.Average.VclkFrequency; + gpu_metrics->average_dclk_frequency = metrics.Average.DclkFrequency; + + gpu_metrics->current_gfxclk = metrics.Current.GfxclkFrequency; + gpu_metrics->current_socclk = metrics.Current.SocclkFrequency; + gpu_metrics->current_uclk = metrics.Current.MemclkFrequency; + gpu_metrics->current_fclk = metrics.Current.MemclkFrequency; + gpu_metrics->current_vclk = metrics.Current.VclkFrequency; + gpu_metrics->current_dclk = metrics.Current.DclkFrequency; + + for (i = 0; i < 6; i++) { + gpu_metrics->temperature_core[i] = metrics.Current.CoreTemperature[i]; + gpu_metrics->average_core_power[i] = metrics.Average.CorePower[i]; + gpu_metrics->current_coreclk[i] = metrics.Current.CoreFrequency[i]; + } + + for (i = 0; i < 2; i++) { + gpu_metrics->temperature_l3[i] = metrics.Current.L3Temperature[i]; + gpu_metrics->current_l3clk[i] = metrics.Current.L3Frequency[i]; + } + + gpu_metrics->throttle_status = metrics.Current.ThrottlerStatus; + gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); + + *table = (void *)gpu_metrics; + + return sizeof(struct gpu_metrics_v2_2); +} + +static int cyan_skillfish_od_edit_dpm_table(struct smu_context *smu, + enum PP_OD_DPM_TABLE_COMMAND type, + long input[], uint32_t size) +{ + int ret = 0; + uint32_t vid; + + switch (type) { + case PP_OD_EDIT_VDDC_CURVE: + if (size != 3 || input[0] != 0) { + dev_err(smu->adev->dev, "Invalid parameter!\n"); + return -EINVAL; + } + + if (input[1] <= CYAN_SKILLFISH_SCLK_MIN || + input[1] > CYAN_SKILLFISH_SCLK_MAX) { + dev_err(smu->adev->dev, "Invalid sclk! Valid sclk range: %uMHz - %uMhz\n", + CYAN_SKILLFISH_SCLK_MIN, CYAN_SKILLFISH_SCLK_MAX); + return -EINVAL; + } + + if (input[2] <= CYAN_SKILLFISH_VDDC_MIN || + input[2] > CYAN_SKILLFISH_VDDC_MAX) { + dev_err(smu->adev->dev, "Invalid vddc! Valid vddc range: %umV - %umV\n", + CYAN_SKILLFISH_VDDC_MIN, CYAN_SKILLFISH_VDDC_MAX); + return -EINVAL; + } + + cyan_skillfish_user_settings.sclk = input[1]; + cyan_skillfish_user_settings.vddc = input[2]; + + break; + case PP_OD_RESTORE_DEFAULT_TABLE: + if (size != 0) { + dev_err(smu->adev->dev, "Invalid parameter!\n"); + return -EINVAL; + } + + cyan_skillfish_user_settings.sclk = CYAN_SKILLFISH_SCLK_DEFAULT; + cyan_skillfish_user_settings.vddc = CYAN_SKILLFISH_VDDC_MAGIC; + + break; + case PP_OD_COMMIT_DPM_TABLE: + if (size != 0) { + dev_err(smu->adev->dev, "Invalid parameter!\n"); + return -EINVAL; + } + + if (cyan_skillfish_user_settings.sclk < CYAN_SKILLFISH_SCLK_MIN || + cyan_skillfish_user_settings.sclk > CYAN_SKILLFISH_SCLK_MAX) { + dev_err(smu->adev->dev, "Invalid sclk! Valid sclk range: %uMHz - %uMhz\n", + CYAN_SKILLFISH_SCLK_MIN, CYAN_SKILLFISH_SCLK_MAX); + return -EINVAL; + } + + if ((cyan_skillfish_user_settings.vddc != CYAN_SKILLFISH_VDDC_MAGIC) && + (cyan_skillfish_user_settings.vddc < CYAN_SKILLFISH_VDDC_MIN || + cyan_skillfish_user_settings.vddc > CYAN_SKILLFISH_VDDC_MAX)) { + dev_err(smu->adev->dev, "Invalid vddc! Valid vddc range: %umV - %umV\n", + CYAN_SKILLFISH_VDDC_MIN, CYAN_SKILLFISH_VDDC_MAX); + return -EINVAL; + } + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RequestGfxclk, + cyan_skillfish_user_settings.sclk, NULL); + if (ret) { + dev_err(smu->adev->dev, "Set sclk failed!\n"); + return ret; + } + + if (cyan_skillfish_user_settings.vddc == CYAN_SKILLFISH_VDDC_MAGIC) { + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_UnforceGfxVid, NULL); + if (ret) { + dev_err(smu->adev->dev, "Unforce vddc failed!\n"); + return ret; + } + } else { + /* + * PMFW accepts SVI2 VID code, convert voltage to VID: + * vid = (uint32_t)((1.55 - voltage) * 160.0 + 0.00001) + */ + vid = (1550 - cyan_skillfish_user_settings.vddc) * 160 / 1000; + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ForceGfxVid, vid, NULL); + if (ret) { + dev_err(smu->adev->dev, "Force vddc failed!\n"); + return ret; + } + } + + break; + default: + return -EOPNOTSUPP; + } + + return ret; +} + static const struct pptable_funcs cyan_skillfish_ppt_funcs = { .check_fw_status = smu_v11_0_check_fw_status, .check_fw_version = smu_v11_0_check_fw_version, .init_power = smu_v11_0_init_power, .fini_power = smu_v11_0_fini_power, + .init_smc_tables = cyan_skillfish_init_smc_tables, + .fini_smc_tables = cyan_skillfish_finit_smc_tables, + .read_sensor = cyan_skillfish_read_sensor, + .print_clk_levels = cyan_skillfish_print_clk_levels, + .is_dpm_running = cyan_skillfish_is_dpm_running, + .get_gpu_metrics = cyan_skillfish_get_gpu_metrics, + .od_edit_dpm_table = cyan_skillfish_od_edit_dpm_table, .register_irq_handler = smu_v11_0_register_irq_handler, .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location, .send_smc_msg_with_param = smu_cmn_send_smc_msg_with_param, @@ -72,5 +552,6 @@ void cyan_skillfish_set_ppt_funcs(struct smu_context *smu) { smu->ppt_funcs = &cyan_skillfish_ppt_funcs; smu->message_map = cyan_skillfish_message_map; + smu->table_map = cyan_skillfish_table_map; smu->is_apu = true; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index f96681700c41..b1ad451af06b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -431,16 +431,16 @@ static int navi10_append_powerplay_table(struct smu_context *smu) switch (smc_dpm_table->table_header.content_revision) { case 5: /* nv10 and nv14 */ - memcpy(smc_pptable->I2cControllers, smc_dpm_table->I2cControllers, - sizeof(*smc_dpm_table) - sizeof(smc_dpm_table->table_header)); + smu_memcpy_trailing(smc_pptable, I2cControllers, BoardReserved, + smc_dpm_table, I2cControllers); break; case 7: /* nv12 */ ret = amdgpu_atombios_get_data_table(adev, index, NULL, NULL, NULL, (uint8_t **)&smc_dpm_table_v4_7); if (ret) return ret; - memcpy(smc_pptable->I2cControllers, smc_dpm_table_v4_7->I2cControllers, - sizeof(*smc_dpm_table_v4_7) - sizeof(smc_dpm_table_v4_7->table_header)); + smu_memcpy_trailing(smc_pptable, I2cControllers, BoardReserved, + smc_dpm_table_v4_7, I2cControllers); break; default: dev_err(smu->adev->dev, "smc_dpm_info with unsupported content revision %d!\n", @@ -1279,6 +1279,8 @@ static int navi10_print_clk_levels(struct smu_context *smu, struct smu_11_0_overdrive_table *od_settings = smu->od_settings; uint32_t min_value, max_value; + smu_cmn_get_sysfs_buf(&buf, &size); + switch (clk_type) { case SMU_GFXCLK: case SMU_SCLK: @@ -1392,7 +1394,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, case SMU_OD_RANGE: if (!smu->od_enabled || !od_table || !od_settings) break; - size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS)) { navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_GFXCLKFMIN, @@ -2272,7 +2274,27 @@ static int navi10_baco_enter(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - if (adev->in_runpm) + /* + * This aims the case below: + * amdgpu driver loaded -> runpm suspend kicked -> sound driver loaded + * + * For NAVI10 and later ASICs, we rely on PMFW to handle the runpm. To + * make that possible, PMFW needs to acknowledge the dstate transition + * process for both gfx(function 0) and audio(function 1) function of + * the ASIC. + * + * The PCI device's initial runpm status is RUNPM_SUSPENDED. So as the + * device representing the audio function of the ASIC. And that means + * even if the sound driver(snd_hda_intel) was not loaded yet, it's still + * possible runpm suspend kicked on the ASIC. However without the dstate + * transition notification from audio function, pmfw cannot handle the + * BACO in/exit correctly. And that will cause driver hang on runpm + * resuming. + * + * To address this, we revert to legacy message way(driver masters the + * timing for BACO in/exit) on sound driver missing. + */ + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) return smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_BACO); else return smu_v11_0_baco_enter(smu); @@ -2282,7 +2304,7 @@ static int navi10_baco_exit(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - if (adev->in_runpm) { + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { /* Wait for PMFW handling for the Dstate change */ msleep(10); return smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 5e292c3f5050..ca57221e3962 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1058,6 +1058,8 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, uint32_t min_value, max_value; uint32_t smu_version; + smu_cmn_get_sysfs_buf(&buf, &size); + switch (clk_type) { case SMU_GFXCLK: case SMU_SCLK: @@ -1180,7 +1182,7 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, if (!smu->od_enabled || !od_table || !od_settings) break; - size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); if (sienna_cichlid_is_od_feature_supported(od_settings, SMU_11_0_7_ODCAP_GFXCLK_LIMITS)) { sienna_cichlid_get_od_setting_range(od_settings, SMU_11_0_7_ODSETTING_GFXCLKFMIN, @@ -2187,7 +2189,7 @@ static int sienna_cichlid_baco_enter(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - if (adev->in_runpm) + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) return smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_BACO); else return smu_v11_0_baco_enter(smu); @@ -2197,7 +2199,7 @@ static int sienna_cichlid_baco_exit(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - if (adev->in_runpm) { + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { /* Wait for PMFW handling for the Dstate change */ msleep(10); return smu_v11_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 6eb50b05a33c..f6ef0ce6e9e2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -589,10 +589,12 @@ static int vangogh_print_legacy_clk_levels(struct smu_context *smu, if (ret) return ret; + smu_cmn_get_sysfs_buf(&buf, &size); + switch (clk_type) { case SMU_OD_SCLK: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sysfs_emit(buf, "%s:\n", "OD_SCLK"); + size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK"); size += sysfs_emit_at(buf, size, "0: %10uMhz\n", (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq); size += sysfs_emit_at(buf, size, "1: %10uMhz\n", @@ -601,7 +603,7 @@ static int vangogh_print_legacy_clk_levels(struct smu_context *smu, break; case SMU_OD_CCLK: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sysfs_emit(buf, "CCLK_RANGE in Core%d:\n", smu->cpu_core_id_select); + size += sysfs_emit_at(buf, size, "CCLK_RANGE in Core%d:\n", smu->cpu_core_id_select); size += sysfs_emit_at(buf, size, "0: %10uMhz\n", (smu->cpu_actual_soft_min_freq > 0) ? smu->cpu_actual_soft_min_freq : smu->cpu_default_soft_min_freq); size += sysfs_emit_at(buf, size, "1: %10uMhz\n", @@ -610,7 +612,7 @@ static int vangogh_print_legacy_clk_levels(struct smu_context *smu, break; case SMU_OD_RANGE: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", smu->gfx_default_hard_min_freq, smu->gfx_default_soft_max_freq); size += sysfs_emit_at(buf, size, "CCLK: %7uMhz %10uMhz\n", @@ -688,10 +690,12 @@ static int vangogh_print_clk_levels(struct smu_context *smu, if (ret) return ret; + smu_cmn_get_sysfs_buf(&buf, &size); + switch (clk_type) { case SMU_OD_SCLK: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sysfs_emit(buf, "%s:\n", "OD_SCLK"); + size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK"); size += sysfs_emit_at(buf, size, "0: %10uMhz\n", (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq); size += sysfs_emit_at(buf, size, "1: %10uMhz\n", @@ -700,7 +704,7 @@ static int vangogh_print_clk_levels(struct smu_context *smu, break; case SMU_OD_CCLK: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sysfs_emit(buf, "CCLK_RANGE in Core%d:\n", smu->cpu_core_id_select); + size += sysfs_emit_at(buf, size, "CCLK_RANGE in Core%d:\n", smu->cpu_core_id_select); size += sysfs_emit_at(buf, size, "0: %10uMhz\n", (smu->cpu_actual_soft_min_freq > 0) ? smu->cpu_actual_soft_min_freq : smu->cpu_default_soft_min_freq); size += sysfs_emit_at(buf, size, "1: %10uMhz\n", @@ -709,7 +713,7 @@ static int vangogh_print_clk_levels(struct smu_context *smu, break; case SMU_OD_RANGE: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { - size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", smu->gfx_default_hard_min_freq, smu->gfx_default_soft_max_freq); size += sysfs_emit_at(buf, size, "CCLK: %7uMhz %10uMhz\n", @@ -1869,7 +1873,7 @@ static int vangogh_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TAB } else { if (smu->gfx_actual_hard_min_freq > smu->gfx_actual_soft_max_freq) { dev_err(smu->adev->dev, - "The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n", + "The setting minimum sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n", smu->gfx_actual_hard_min_freq, smu->gfx_actual_soft_max_freq); return -EINVAL; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index b39138041141..145f13b8c977 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -426,7 +426,7 @@ static int renoir_od_edit_dpm_table(struct smu_context *smu, } else { if (smu->gfx_actual_hard_min_freq > smu->gfx_actual_soft_max_freq) { dev_err(smu->adev->dev, - "The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n", + "The setting minimum sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n", smu->gfx_actual_hard_min_freq, smu->gfx_actual_soft_max_freq); return -EINVAL; @@ -497,6 +497,8 @@ static int renoir_print_clk_levels(struct smu_context *smu, if (ret) return ret; + smu_cmn_get_sysfs_buf(&buf, &size); + switch (clk_type) { case SMU_OD_RANGE: if (smu_dpm_ctx->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index ec8c30daf31c..5019903db492 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -409,9 +409,8 @@ static int aldebaran_append_powerplay_table(struct smu_context *smu) if ((smc_dpm_table->table_header.format_revision == 4) && (smc_dpm_table->table_header.content_revision == 10)) - memcpy(&smc_pptable->GfxMaxCurrent, - &smc_dpm_table->GfxMaxCurrent, - sizeof(*smc_dpm_table) - offsetof(struct atom_smc_dpm_info_v4_10, GfxMaxCurrent)); + smu_memcpy_trailing(smc_pptable, GfxMaxCurrent, reserved, + smc_dpm_table, GfxMaxCurrent); return 0; } @@ -734,15 +733,19 @@ static int aldebaran_print_clk_levels(struct smu_context *smu, uint32_t freq_values[3] = {0}; uint32_t min_clk, max_clk; - if (amdgpu_ras_intr_triggered()) - return sysfs_emit(buf, "unavailable\n"); + smu_cmn_get_sysfs_buf(&buf, &size); + + if (amdgpu_ras_intr_triggered()) { + size += sysfs_emit_at(buf, size, "unavailable\n"); + return size; + } dpm_context = smu_dpm->dpm_context; switch (type) { case SMU_OD_SCLK: - size = sysfs_emit(buf, "%s:\n", "GFXCLK"); + size += sysfs_emit_at(buf, size, "%s:\n", "GFXCLK"); fallthrough; case SMU_SCLK: ret = aldebaran_get_current_clk_freq_by_table(smu, SMU_GFXCLK, &now); @@ -796,7 +799,7 @@ static int aldebaran_print_clk_levels(struct smu_context *smu, break; case SMU_OD_MCLK: - size = sysfs_emit(buf, "%s:\n", "MCLK"); + size += sysfs_emit_at(buf, size, "%s:\n", "MCLK"); fallthrough; case SMU_MCLK: ret = aldebaran_get_current_clk_freq_by_table(smu, SMU_UCLK, &now); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index 0f17c2522c85..a403657151ba 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -731,7 +731,7 @@ static int yellow_carp_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM } else { if (smu->gfx_actual_hard_min_freq > smu->gfx_actual_soft_max_freq) { dev_err(smu->adev->dev, - "The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n", + "The setting minimum sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n", smu->gfx_actual_hard_min_freq, smu->gfx_actual_soft_max_freq); return -EINVAL; @@ -1052,16 +1052,18 @@ static int yellow_carp_print_clk_levels(struct smu_context *smu, int i, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0; + smu_cmn_get_sysfs_buf(&buf, &size); + switch (clk_type) { case SMU_OD_SCLK: - size = sysfs_emit(buf, "%s:\n", "OD_SCLK"); + size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK"); size += sysfs_emit_at(buf, size, "0: %10uMhz\n", (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq); size += sysfs_emit_at(buf, size, "1: %10uMhz\n", (smu->gfx_actual_soft_max_freq > 0) ? smu->gfx_actual_soft_max_freq : smu->gfx_default_soft_max_freq); break; case SMU_OD_RANGE: - size = sysfs_emit(buf, "%s:\n", "OD_RANGE"); + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n", smu->gfx_default_hard_min_freq, smu->gfx_default_soft_max_freq); break; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 66711ab24c15..843d2cbfc71d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -1053,3 +1053,24 @@ int smu_cmn_set_mp1_state(struct smu_context *smu, return ret; } + +bool smu_cmn_is_audio_func_enabled(struct amdgpu_device *adev) +{ + struct pci_dev *p = NULL; + bool snd_driver_loaded; + + /* + * If the ASIC comes with no audio function, we always assume + * it is "enabled". + */ + p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), + adev->pdev->bus->number, 1); + if (!p) + return true; + + snd_driver_loaded = pci_is_enabled(p) ? true : false; + + pci_dev_put(p); + + return snd_driver_loaded; +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index 16993daa2ae0..beea03810bca 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -110,5 +110,20 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev); int smu_cmn_set_mp1_state(struct smu_context *smu, enum pp_mp1_state mp1_state); +/* + * Helper function to make sysfs_emit_at() happy. Align buf to + * the current page boundary and record the offset. + */ +static inline void smu_cmn_get_sysfs_buf(char **buf, int *offset) +{ + if (!*buf || !offset) + return; + + *offset = offset_in_page(*buf); + *buf -= *offset; +} + +bool smu_cmn_is_audio_func_enabled(struct amdgpu_device *adev); + #endif #endif diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c index 76d38561c910..cf741c5c82d2 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c @@ -397,8 +397,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state, if (switch_mmu_context) { struct etnaviv_iommu_context *old_context = gpu->mmu_context; - etnaviv_iommu_context_get(mmu_context); - gpu->mmu_context = mmu_context; + gpu->mmu_context = etnaviv_iommu_context_get(mmu_context); etnaviv_iommu_context_put(old_context); } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 8f1b5af47dd6..f0b2540e60e4 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -294,8 +294,7 @@ struct etnaviv_vram_mapping *etnaviv_gem_mapping_get( list_del(&mapping->obj_node); } - etnaviv_iommu_context_get(mmu_context); - mapping->context = mmu_context; + mapping->context = etnaviv_iommu_context_get(mmu_context); mapping->use = 1; ret = etnaviv_iommu_map_gem(mmu_context, etnaviv_obj, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 4dd7d9d541c0..486259e154af 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -532,8 +532,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, goto err_submit_objects; submit->ctx = file->driver_priv; - etnaviv_iommu_context_get(submit->ctx->mmu); - submit->mmu_context = submit->ctx->mmu; + submit->mmu_context = etnaviv_iommu_context_get(submit->ctx->mmu); submit->exec_state = args->exec_state; submit->flags = args->flags; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index c297fffe06eb..cc5b07f86346 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -569,6 +569,12 @@ static int etnaviv_hw_reset(struct etnaviv_gpu *gpu) /* We rely on the GPU running, so program the clock */ etnaviv_gpu_update_clock(gpu); + gpu->fe_running = false; + gpu->exec_state = -1; + if (gpu->mmu_context) + etnaviv_iommu_context_put(gpu->mmu_context); + gpu->mmu_context = NULL; + return 0; } @@ -637,19 +643,23 @@ void etnaviv_gpu_start_fe(struct etnaviv_gpu *gpu, u32 address, u16 prefetch) VIVS_MMUv2_SEC_COMMAND_CONTROL_ENABLE | VIVS_MMUv2_SEC_COMMAND_CONTROL_PREFETCH(prefetch)); } + + gpu->fe_running = true; } -static void etnaviv_gpu_start_fe_idleloop(struct etnaviv_gpu *gpu) +static void etnaviv_gpu_start_fe_idleloop(struct etnaviv_gpu *gpu, + struct etnaviv_iommu_context *context) { - u32 address = etnaviv_cmdbuf_get_va(&gpu->buffer, - &gpu->mmu_context->cmdbuf_mapping); u16 prefetch; + u32 address; /* setup the MMU */ - etnaviv_iommu_restore(gpu, gpu->mmu_context); + etnaviv_iommu_restore(gpu, context); /* Start command processor */ prefetch = etnaviv_buffer_init(gpu); + address = etnaviv_cmdbuf_get_va(&gpu->buffer, + &gpu->mmu_context->cmdbuf_mapping); etnaviv_gpu_start_fe(gpu, address, prefetch); } @@ -832,7 +842,6 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) /* Now program the hardware */ mutex_lock(&gpu->lock); etnaviv_gpu_hw_init(gpu); - gpu->exec_state = -1; mutex_unlock(&gpu->lock); pm_runtime_mark_last_busy(gpu->dev); @@ -1057,8 +1066,6 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu) spin_unlock(&gpu->event_spinlock); etnaviv_gpu_hw_init(gpu); - gpu->exec_state = -1; - gpu->mmu_context = NULL; mutex_unlock(&gpu->lock); pm_runtime_mark_last_busy(gpu->dev); @@ -1370,14 +1377,12 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit) goto out_unlock; } - if (!gpu->mmu_context) { - etnaviv_iommu_context_get(submit->mmu_context); - gpu->mmu_context = submit->mmu_context; - etnaviv_gpu_start_fe_idleloop(gpu); - } else { - etnaviv_iommu_context_get(gpu->mmu_context); - submit->prev_mmu_context = gpu->mmu_context; - } + if (!gpu->fe_running) + etnaviv_gpu_start_fe_idleloop(gpu, submit->mmu_context); + + if (submit->prev_mmu_context) + etnaviv_iommu_context_put(submit->prev_mmu_context); + submit->prev_mmu_context = etnaviv_iommu_context_get(gpu->mmu_context); if (submit->nr_pmrs) { gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre; @@ -1579,7 +1584,7 @@ int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms) static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu) { - if (gpu->initialized && gpu->mmu_context) { + if (gpu->initialized && gpu->fe_running) { /* Replace the last WAIT with END */ mutex_lock(&gpu->lock); etnaviv_buffer_end(gpu); @@ -1592,8 +1597,7 @@ static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu) */ etnaviv_gpu_wait_idle(gpu, 100); - etnaviv_iommu_context_put(gpu->mmu_context); - gpu->mmu_context = NULL; + gpu->fe_running = false; } gpu->exec_state = -1; @@ -1741,6 +1745,9 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master, etnaviv_gpu_hw_suspend(gpu); #endif + if (gpu->mmu_context) + etnaviv_iommu_context_put(gpu->mmu_context); + if (gpu->initialized) { etnaviv_cmdbuf_free(&gpu->buffer); etnaviv_iommu_global_fini(gpu); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 8ea48697d132..1c75c8ed5bce 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -101,6 +101,7 @@ struct etnaviv_gpu { struct workqueue_struct *wq; struct drm_gpu_scheduler sched; bool initialized; + bool fe_running; /* 'ring'-buffer: */ struct etnaviv_cmdbuf buffer; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c index 1a7c89a67bea..afe5dd6a9925 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c @@ -92,6 +92,10 @@ static void etnaviv_iommuv1_restore(struct etnaviv_gpu *gpu, struct etnaviv_iommuv1_context *v1_context = to_v1_context(context); u32 pgtable; + if (gpu->mmu_context) + etnaviv_iommu_context_put(gpu->mmu_context); + gpu->mmu_context = etnaviv_iommu_context_get(context); + /* set base addresses */ gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_RA, context->global->memory_base); gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_FE, context->global->memory_base); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c index f8bf488e9d71..d664ae29ae20 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c @@ -172,6 +172,10 @@ static void etnaviv_iommuv2_restore_nonsec(struct etnaviv_gpu *gpu, if (gpu_read(gpu, VIVS_MMUv2_CONTROL) & VIVS_MMUv2_CONTROL_ENABLE) return; + if (gpu->mmu_context) + etnaviv_iommu_context_put(gpu->mmu_context); + gpu->mmu_context = etnaviv_iommu_context_get(context); + prefetch = etnaviv_buffer_config_mmuv2(gpu, (u32)v2_context->mtlb_dma, (u32)context->global->bad_page_dma); @@ -192,6 +196,10 @@ static void etnaviv_iommuv2_restore_sec(struct etnaviv_gpu *gpu, if (gpu_read(gpu, VIVS_MMUv2_SEC_CONTROL) & VIVS_MMUv2_SEC_CONTROL_ENABLE) return; + if (gpu->mmu_context) + etnaviv_iommu_context_put(gpu->mmu_context); + gpu->mmu_context = etnaviv_iommu_context_get(context); + gpu_write(gpu, VIVS_MMUv2_PTA_ADDRESS_LOW, lower_32_bits(context->global->v2.pta_dma)); gpu_write(gpu, VIVS_MMUv2_PTA_ADDRESS_HIGH, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index dab1b58006d8..9fb1a2aadbcb 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -199,6 +199,7 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu_context *context, */ list_for_each_entry_safe(m, n, &list, scan_node) { etnaviv_iommu_remove_mapping(context, m); + etnaviv_iommu_context_put(m->context); m->context = NULL; list_del_init(&m->mmu_node); list_del_init(&m->scan_node); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h index d1d6902fd13b..e4a0b7d09c2e 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h @@ -105,9 +105,11 @@ void etnaviv_iommu_dump(struct etnaviv_iommu_context *ctx, void *buf); struct etnaviv_iommu_context * etnaviv_iommu_context_init(struct etnaviv_iommu_global *global, struct etnaviv_cmdbuf_suballoc *suballoc); -static inline void etnaviv_iommu_context_get(struct etnaviv_iommu_context *ctx) +static inline struct etnaviv_iommu_context * +etnaviv_iommu_context_get(struct etnaviv_iommu_context *ctx) { kref_get(&ctx->refcount); + return ctx; } void etnaviv_iommu_context_put(struct etnaviv_iommu_context *ctx); void etnaviv_iommu_restore(struct etnaviv_gpu *gpu, diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 642a5b5a1b81..335ba9f43d8f 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -19,7 +19,6 @@ subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) # clang warnings subdir-ccflags-y += $(call cc-disable-warning, sign-compare) -subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) subdir-ccflags-y += $(call cc-disable-warning, frame-address) subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 04175f359fd6..abe3d61b6243 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2445,11 +2445,14 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) */ if (drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV, intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd)) == - sizeof(intel_dp->edp_dpcd)) + sizeof(intel_dp->edp_dpcd)) { drm_dbg_kms(&dev_priv->drm, "eDP DPCD: %*ph\n", (int)sizeof(intel_dp->edp_dpcd), intel_dp->edp_dpcd); + intel_dp->use_max_params = intel_dp->edp_dpcd[0] < DP_EDP_14; + } + /* * This has to be called after intel_dp->edp_dpcd is filled, PSR checks * for SET_POWER_CAPABLE bit in intel_dp->edp_dpcd[1] diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 053a3c2f7267..508a514c5e37 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -848,7 +848,7 @@ intel_dp_link_train_all_phys(struct intel_dp *intel_dp, } if (ret) - intel_dp_link_train_phy(intel_dp, crtc_state, DP_PHY_DPRX); + ret = intel_dp_link_train_phy(intel_dp, crtc_state, DP_PHY_DPRX); if (intel_dp->set_idle_link_train) intel_dp->set_idle_link_train(intel_dp, crtc_state); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index cff72679ad7c..9ccf4b29b82e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -986,6 +986,9 @@ void i915_gem_context_release(struct kref *ref) trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); + if (ctx->syncobj) + drm_syncobj_put(ctx->syncobj); + mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->lut_mutex); @@ -1205,9 +1208,6 @@ static void context_close(struct i915_gem_context *ctx) if (vm) i915_vm_close(vm); - if (ctx->syncobj) - drm_syncobj_put(ctx->syncobj); - ctx->file_priv = ERR_PTR(-EBADF); /* diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index ffae7df5e4d7..4a6bb64c3a35 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -59,13 +59,13 @@ static int igt_dmabuf_import_self(void *arg) err = PTR_ERR(import); goto out_dmabuf; } + import_obj = to_intel_bo(import); if (import != &obj->base) { pr_err("i915_gem_prime_import created a new object!\n"); err = -EINVAL; goto out_import; } - import_obj = to_intel_bo(import); i915_gem_object_lock(import_obj, NULL); err = __i915_gem_object_get_pages(import_obj); @@ -128,6 +128,8 @@ static int igt_dmabuf_import_same_driver_lmem(void *arg) pr_err("i915_gem_prime_import failed with the wrong err=%ld\n", PTR_ERR(import)); err = PTR_ERR(import); + } else { + err = 0; } dma_buf_put(dmabuf); @@ -176,6 +178,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, err = PTR_ERR(import); goto out_dmabuf; } + import_obj = to_intel_bo(import); if (import == &obj->base) { pr_err("i915_gem_prime_import reused gem object!\n"); @@ -183,8 +186,6 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, goto out_import; } - import_obj = to_intel_bo(import); - i915_gem_object_lock(import_obj, NULL); err = __i915_gem_object_get_pages(import_obj); if (err) { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index b20f5621f62b..a2c34e5a1c54 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -581,6 +581,20 @@ static enum i915_mmap_type default_mapping(struct drm_i915_private *i915) return I915_MMAP_TYPE_GTT; } +static struct drm_i915_gem_object * +create_sys_or_internal(struct drm_i915_private *i915, + unsigned long size) +{ + if (HAS_LMEM(i915)) { + struct intel_memory_region *sys_region = + i915->mm.regions[INTEL_REGION_SMEM]; + + return __i915_gem_object_create_user(i915, size, &sys_region, 1); + } + + return i915_gem_object_create_internal(i915, size); +} + static bool assert_mmap_offset(struct drm_i915_private *i915, unsigned long size, int expected) @@ -589,7 +603,7 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, u64 offset; int ret; - obj = i915_gem_object_create_internal(i915, size); + obj = create_sys_or_internal(i915, size); if (IS_ERR(obj)) return expected && expected == PTR_ERR(obj); @@ -633,6 +647,7 @@ static int igt_mmap_offset_exhaustion(void *arg) struct drm_mm_node *hole, *next; int loop, err = 0; u64 offset; + int enospc = HAS_LMEM(i915) ? -ENXIO : -ENOSPC; /* Disable background reaper */ disable_retire_worker(i915); @@ -683,14 +698,14 @@ static int igt_mmap_offset_exhaustion(void *arg) } /* Too large */ - if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, -ENOSPC)) { + if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, enospc)) { pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n"); err = -EINVAL; goto out; } /* Fill the hole, further allocation attempts should then fail */ - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + obj = create_sys_or_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); pr_err("Unable to create object for reclaimed hole\n"); @@ -703,7 +718,7 @@ static int igt_mmap_offset_exhaustion(void *arg) goto err_obj; } - if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) { + if (!assert_mmap_offset(i915, PAGE_SIZE, enospc)) { pr_err("Unexpectedly succeeded in inserting object into no holes!\n"); err = -EINVAL; goto err_obj; @@ -839,10 +854,9 @@ static int wc_check(struct drm_i915_gem_object *obj) static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); bool no_map; - if (HAS_LMEM(i915)) + if (obj->ops->mmap_offset) return type == I915_MMAP_TYPE_FIXED; else if (type == I915_MMAP_TYPE_FIXED) return false; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h index 51dbe0e3294e..d2969f68dd64 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -6,7 +6,7 @@ #ifndef INTEL_GT_REQUESTS_H #define INTEL_GT_REQUESTS_H -#include <stddef.h> +#include <linux/stddef.h> struct intel_engine_cs; struct intel_gt; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index d812b27835f8..591a5224287e 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1973,8 +1973,14 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps) u32 intel_rps_read_punit_req(struct intel_rps *rps) { struct intel_uncore *uncore = rps_to_uncore(rps); + struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; + intel_wakeref_t wakeref; + u32 freq = 0; - return intel_uncore_read(uncore, GEN6_RPNSWREQ); + with_intel_runtime_pm_if_in_use(rpm, wakeref) + freq = intel_uncore_read(uncore, GEN6_RPNSWREQ); + + return freq; } static u32 intel_rps_get_req(u32 pureq) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index b104fb7607eb..86c318516e14 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -172,11 +172,6 @@ void intel_uc_driver_remove(struct intel_uc *uc) __uc_free_load_err_log(uc); } -static inline bool guc_communication_enabled(struct intel_guc *guc) -{ - return intel_guc_ct_enabled(&guc->ct); -} - /* * Events triggered while CT buffers are disabled are logged in the SCRATCH_15 * register using the same bits used in the CT message payload. Since our @@ -210,7 +205,7 @@ static void guc_get_mmio_msg(struct intel_guc *guc) static void guc_handle_mmio_msg(struct intel_guc *guc) { /* we need communication to be enabled to reply to GuC */ - GEM_BUG_ON(!guc_communication_enabled(guc)); + GEM_BUG_ON(!intel_guc_ct_enabled(&guc->ct)); spin_lock_irq(&guc->irq_lock); if (guc->mmio_msg) { @@ -226,7 +221,7 @@ static int guc_enable_communication(struct intel_guc *guc) struct drm_i915_private *i915 = gt->i915; int ret; - GEM_BUG_ON(guc_communication_enabled(guc)); + GEM_BUG_ON(intel_guc_ct_enabled(&guc->ct)); ret = i915_inject_probe_error(i915, -ENXIO); if (ret) @@ -662,7 +657,7 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication) return 0; /* Make sure we enable communication if and only if it's disabled */ - GEM_BUG_ON(enable_communication == guc_communication_enabled(guc)); + GEM_BUG_ON(enable_communication == intel_guc_ct_enabled(&guc->ct)); if (enable_communication) guc_enable_communication(guc); diff --git a/drivers/gpu/drm/mgag200/mgag200_pll.c b/drivers/gpu/drm/mgag200/mgag200_pll.c index 7c903cf19c0d..e9ae22b4f813 100644 --- a/drivers/gpu/drm/mgag200/mgag200_pll.c +++ b/drivers/gpu/drm/mgag200/mgag200_pll.c @@ -124,6 +124,7 @@ static int mgag200_pixpll_compute_g200se_00(struct mgag200_pll *pixpll, long clo unsigned int computed; m = n = p = s = 0; + delta = 0xffffffff; permitteddelta = clock * 5 / 1000; for (testp = 8; testp > 0; testp /= 2) { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c index b0ece71aefde..ce774579c89d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/ctrl.c @@ -57,7 +57,7 @@ nvkm_control_mthd_pstate_info(struct nvkm_control *ctrl, void *data, u32 size) args->v0.count = 0; args->v0.ustate_ac = NVIF_CONTROL_PSTATE_INFO_V0_USTATE_DISABLE; args->v0.ustate_dc = NVIF_CONTROL_PSTATE_INFO_V0_USTATE_DISABLE; - args->v0.pwrsrc = -ENOSYS; + args->v0.pwrsrc = -ENODEV; args->v0.pstate = NVIF_CONTROL_PSTATE_INFO_V0_PSTATE_UNKNOWN; } diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 0da5b3100ab1..dfe5f1d29763 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -58,25 +58,16 @@ static int write_cmd(struct panfrost_device *pfdev, u32 as_nr, u32 cmd) } static void lock_region(struct panfrost_device *pfdev, u32 as_nr, - u64 iova, size_t size) + u64 iova, u64 size) { u8 region_width; u64 region = iova & PAGE_MASK; - /* - * fls returns: - * 1 .. 32 - * - * 10 + fls(num_pages) - * results in the range (11 .. 42) - */ - - size = round_up(size, PAGE_SIZE); - region_width = 10 + fls(size >> PAGE_SHIFT); - if ((size >> PAGE_SHIFT) != (1ul << (region_width - 11))) { - /* not pow2, so must go up to the next pow2 */ - region_width += 1; - } + /* The size is encoded as ceil(log2) minus(1), which may be calculated + * with fls. The size must be clamped to hardware bounds. + */ + size = max_t(u64, size, AS_LOCK_REGION_MIN_SIZE); + region_width = fls64(size - 1) - 1; region |= region_width; /* Lock the region that needs to be updated */ @@ -87,7 +78,7 @@ static void lock_region(struct panfrost_device *pfdev, u32 as_nr, static int mmu_hw_do_operation_locked(struct panfrost_device *pfdev, int as_nr, - u64 iova, size_t size, u32 op) + u64 iova, u64 size, u32 op) { if (as_nr < 0) return 0; @@ -104,7 +95,7 @@ static int mmu_hw_do_operation_locked(struct panfrost_device *pfdev, int as_nr, static int mmu_hw_do_operation(struct panfrost_device *pfdev, struct panfrost_mmu *mmu, - u64 iova, size_t size, u32 op) + u64 iova, u64 size, u32 op) { int ret; @@ -121,7 +112,7 @@ static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_m u64 transtab = cfg->arm_mali_lpae_cfg.transtab; u64 memattr = cfg->arm_mali_lpae_cfg.memattr; - mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0UL, AS_COMMAND_FLUSH_MEM); + mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), transtab & 0xffffffffUL); mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), transtab >> 32); @@ -137,7 +128,7 @@ static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_m static void panfrost_mmu_disable(struct panfrost_device *pfdev, u32 as_nr) { - mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0UL, AS_COMMAND_FLUSH_MEM); + mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), 0); mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), 0); @@ -251,7 +242,7 @@ static size_t get_pgsize(u64 addr, size_t size) static void panfrost_mmu_flush_range(struct panfrost_device *pfdev, struct panfrost_mmu *mmu, - u64 iova, size_t size) + u64 iova, u64 size) { if (mmu->as < 0) return; diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h index 1940ff86e49a..6c5a11ef1ee8 100644 --- a/drivers/gpu/drm/panfrost/panfrost_regs.h +++ b/drivers/gpu/drm/panfrost/panfrost_regs.h @@ -316,6 +316,8 @@ #define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2 << 8) #define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3 << 8) +#define AS_LOCK_REGION_MIN_SIZE (1ULL << 15) + #define gpu_write(dev, reg, data) writel(data, dev->iomem + reg) #define gpu_read(dev, reg) readl(dev->iomem + reg) diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 0473583dcdac..482fb0ae6cb5 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -119,7 +119,7 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) #endif if (pci_find_capability(pdev, PCI_CAP_ID_AGP)) - rdev->agp = radeon_agp_head_init(rdev->ddev); + rdev->agp = radeon_agp_head_init(dev); if (rdev->agp) { rdev->agp->agp_mtrr = arch_phys_wc_add( rdev->agp->agp_info.aper_base, diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c index 8ab3247dbc4a..13c6b857158f 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-core.c +++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c @@ -1123,7 +1123,7 @@ static int cdn_dp_suspend(struct device *dev) return ret; } -static int cdn_dp_resume(struct device *dev) +static __maybe_unused int cdn_dp_resume(struct device *dev) { struct cdn_dp_device *dp = dev_get_drvdata(dev); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index ea4add2b9717..bb9e02c31946 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1160,9 +1160,9 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, } if (bo->deleted) { - ttm_bo_cleanup_refs(bo, false, false, locked); + ret = ttm_bo_cleanup_refs(bo, false, false, locked); ttm_bo_put(bo); - return 0; + return ret == -EBUSY ? -ENOSPC : ret; } ttm_bo_del_from_lru(bo); @@ -1216,7 +1216,7 @@ out: if (locked) dma_resv_unlock(bo->base.resv); ttm_bo_put(bo); - return ret; + return ret == -EBUSY ? -ENOSPC : ret; } void ttm_bo_tt_destroy(struct ttm_buffer_object *bo) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 763fa6f4e07d..1c5ffe2935af 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -143,7 +143,6 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, struct ttm_resource *src_mem = bo->resource; struct ttm_resource_manager *src_man = ttm_manager_type(bdev, src_mem->mem_type); - struct ttm_resource src_copy = *src_mem; union { struct ttm_kmap_iter_tt tt; struct ttm_kmap_iter_linear_io io; @@ -173,11 +172,11 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, } ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); - src_copy = *src_mem; - ttm_bo_move_sync_cleanup(bo, dst_mem); if (!src_iter->ops->maps_tt) - ttm_kmap_iter_linear_io_fini(&_src_iter.io, bdev, &src_copy); + ttm_kmap_iter_linear_io_fini(&_src_iter.io, bdev, src_mem); + ttm_bo_move_sync_cleanup(bo, dst_mem); + out_src_iter: if (!dst_iter->ops->maps_tt) ttm_kmap_iter_linear_io_fini(&_dst_iter.io, bdev, dst_mem); diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index cb38b1a17b09..82cbb29a05aa 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -383,7 +383,8 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, else gfp_flags |= GFP_HIGHUSER; - for (order = min(MAX_ORDER - 1UL, __fls(num_pages)); num_pages; + for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); + num_pages; order = min_t(unsigned int, order, __fls(num_pages))) { bool apply_caching = false; struct ttm_pool_type *pt; diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 24031a8acd2d..d5cd8b5dc0bf 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -32,7 +32,6 @@ #define pr_fmt(fmt) "[TTM] " fmt #include <linux/sched.h> -#include <linux/pagemap.h> #include <linux/shmem_fs.h> #include <linux/file.h> #include <drm/drm_cache.h> diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index b7dc32a0c9bb..b4b4653fe301 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -167,8 +167,6 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) struct vc4_hdmi *vc4_hdmi = connector_to_vc4_hdmi(connector); bool connected = false; - WARN_ON(pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev)); - if (vc4_hdmi->hpd_gpio && gpiod_get_value_cansleep(vc4_hdmi->hpd_gpio)) { connected = true; @@ -189,12 +187,10 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) } } - pm_runtime_put(&vc4_hdmi->pdev->dev); return connector_status_connected; } cec_phys_addr_invalidate(vc4_hdmi->cec_adap); - pm_runtime_put(&vc4_hdmi->pdev->dev); return connector_status_disconnected; } @@ -436,7 +432,7 @@ static void vc4_hdmi_set_avi_infoframe(struct drm_encoder *encoder) struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder); struct drm_connector *connector = &vc4_hdmi->connector; struct drm_connector_state *cstate = connector->state; - struct drm_crtc *crtc = cstate->crtc; + struct drm_crtc *crtc = encoder->crtc; const struct drm_display_mode *mode = &crtc->state->adjusted_mode; union hdmi_infoframe frame; int ret; @@ -541,11 +537,8 @@ static bool vc4_hdmi_supports_scrambling(struct drm_encoder *encoder, static void vc4_hdmi_enable_scrambling(struct drm_encoder *encoder) { + struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); - struct drm_connector *connector = &vc4_hdmi->connector; - struct drm_connector_state *cstate = connector->state; - struct drm_crtc *crtc = cstate->crtc; - struct drm_display_mode *mode = &crtc->state->adjusted_mode; if (!vc4_hdmi_supports_scrambling(encoder, mode)) return; @@ -566,18 +559,17 @@ static void vc4_hdmi_enable_scrambling(struct drm_encoder *encoder) static void vc4_hdmi_disable_scrambling(struct drm_encoder *encoder) { struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); - struct drm_connector *connector = &vc4_hdmi->connector; - struct drm_connector_state *cstate = connector->state; + struct drm_crtc *crtc = encoder->crtc; /* - * At boot, connector->state will be NULL. Since we don't know the + * At boot, encoder->crtc will be NULL. Since we don't know the * state of the scrambler and in order to avoid any * inconsistency, let's disable it all the time. */ - if (cstate && !vc4_hdmi_supports_scrambling(encoder, &cstate->crtc->mode)) + if (crtc && !vc4_hdmi_supports_scrambling(encoder, &crtc->mode)) return; - if (cstate && !vc4_hdmi_mode_needs_scrambling(&cstate->crtc->mode)) + if (crtc && !vc4_hdmi_mode_needs_scrambling(&crtc->mode)) return; if (delayed_work_pending(&vc4_hdmi->scrambling_work)) @@ -635,6 +627,7 @@ static void vc4_hdmi_encoder_post_crtc_powerdown(struct drm_encoder *encoder, vc4_hdmi->variant->phy_disable(vc4_hdmi); clk_disable_unprepare(vc4_hdmi->pixel_bvb_clock); + clk_disable_unprepare(vc4_hdmi->hsm_clock); clk_disable_unprepare(vc4_hdmi->pixel_clock); ret = pm_runtime_put(&vc4_hdmi->pdev->dev); @@ -898,9 +891,7 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, vc4_hdmi_encoder_get_connector_state(encoder, state); struct vc4_hdmi_connector_state *vc4_conn_state = conn_state_to_vc4_hdmi_conn_state(conn_state); - struct drm_crtc_state *crtc_state = - drm_atomic_get_new_crtc_state(state, conn_state->crtc); - struct drm_display_mode *mode = &crtc_state->adjusted_mode; + struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); unsigned long bvb_rate, pixel_rate, hsm_rate; int ret; @@ -947,6 +938,13 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, return; } + ret = clk_prepare_enable(vc4_hdmi->hsm_clock); + if (ret) { + DRM_ERROR("Failed to turn on HSM clock: %d\n", ret); + clk_disable_unprepare(vc4_hdmi->pixel_clock); + return; + } + vc4_hdmi_cec_update_clk_div(vc4_hdmi); if (pixel_rate > 297000000) @@ -959,6 +957,7 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, ret = clk_set_min_rate(vc4_hdmi->pixel_bvb_clock, bvb_rate); if (ret) { DRM_ERROR("Failed to set pixel bvb clock rate: %d\n", ret); + clk_disable_unprepare(vc4_hdmi->hsm_clock); clk_disable_unprepare(vc4_hdmi->pixel_clock); return; } @@ -966,6 +965,7 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, ret = clk_prepare_enable(vc4_hdmi->pixel_bvb_clock); if (ret) { DRM_ERROR("Failed to turn on pixel bvb clock: %d\n", ret); + clk_disable_unprepare(vc4_hdmi->hsm_clock); clk_disable_unprepare(vc4_hdmi->pixel_clock); return; } @@ -985,11 +985,7 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, static void vc4_hdmi_encoder_pre_crtc_enable(struct drm_encoder *encoder, struct drm_atomic_state *state) { - struct drm_connector_state *conn_state = - vc4_hdmi_encoder_get_connector_state(encoder, state); - struct drm_crtc_state *crtc_state = - drm_atomic_get_new_crtc_state(state, conn_state->crtc); - struct drm_display_mode *mode = &crtc_state->adjusted_mode; + struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder); struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); @@ -1012,11 +1008,7 @@ static void vc4_hdmi_encoder_pre_crtc_enable(struct drm_encoder *encoder, static void vc4_hdmi_encoder_post_crtc_enable(struct drm_encoder *encoder, struct drm_atomic_state *state) { - struct drm_connector_state *conn_state = - vc4_hdmi_encoder_get_connector_state(encoder, state); - struct drm_crtc_state *crtc_state = - drm_atomic_get_new_crtc_state(state, conn_state->crtc); - struct drm_display_mode *mode = &crtc_state->adjusted_mode; + struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder); bool hsync_pos = mode->flags & DRM_MODE_FLAG_PHSYNC; @@ -1204,8 +1196,8 @@ static void vc4_hdmi_audio_set_mai_clock(struct vc4_hdmi *vc4_hdmi, static void vc4_hdmi_set_n_cts(struct vc4_hdmi *vc4_hdmi, unsigned int samplerate) { - struct drm_connector *connector = &vc4_hdmi->connector; - struct drm_crtc *crtc = connector->state->crtc; + struct drm_encoder *encoder = &vc4_hdmi->encoder.base.base; + struct drm_crtc *crtc = encoder->crtc; const struct drm_display_mode *mode = &crtc->state->adjusted_mode; u32 n, cts; u64 tmp; @@ -1238,13 +1230,13 @@ static inline struct vc4_hdmi *dai_to_hdmi(struct snd_soc_dai *dai) static int vc4_hdmi_audio_startup(struct device *dev, void *data) { struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); - struct drm_connector *connector = &vc4_hdmi->connector; + struct drm_encoder *encoder = &vc4_hdmi->encoder.base.base; /* * If the HDMI encoder hasn't probed, or the encoder is * currently in DVI mode, treat the codec dai as missing. */ - if (!connector->state || !(HDMI_READ(HDMI_RAM_PACKET_CONFIG) & + if (!encoder->crtc || !(HDMI_READ(HDMI_RAM_PACKET_CONFIG) & VC4_HDMI_RAM_PACKET_ENABLE)) return -ENODEV; @@ -1462,7 +1454,7 @@ static const struct hdmi_codec_ops vc4_hdmi_codec_ops = { .audio_startup = vc4_hdmi_audio_startup, }; -struct hdmi_codec_pdata vc4_hdmi_codec_pdata = { +static struct hdmi_codec_pdata vc4_hdmi_codec_pdata = { .ops = &vc4_hdmi_codec_ops, .max_i2s_channels = 8, .i2s = 1, @@ -2114,29 +2106,6 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi) return 0; } -#ifdef CONFIG_PM -static int vc4_hdmi_runtime_suspend(struct device *dev) -{ - struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); - - clk_disable_unprepare(vc4_hdmi->hsm_clock); - - return 0; -} - -static int vc4_hdmi_runtime_resume(struct device *dev) -{ - struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); - int ret; - - ret = clk_prepare_enable(vc4_hdmi->hsm_clock); - if (ret) - return ret; - - return 0; -} -#endif - static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) { const struct vc4_hdmi_variant *variant = of_device_get_match_data(dev); @@ -2391,18 +2360,11 @@ static const struct of_device_id vc4_hdmi_dt_match[] = { {} }; -static const struct dev_pm_ops vc4_hdmi_pm_ops = { - SET_RUNTIME_PM_OPS(vc4_hdmi_runtime_suspend, - vc4_hdmi_runtime_resume, - NULL) -}; - struct platform_driver vc4_hdmi_driver = { .probe = vc4_hdmi_dev_probe, .remove = vc4_hdmi_dev_remove, .driver = { .name = "vc4_hdmi", .of_match_table = vc4_hdmi_dt_match, - .pm = &vc4_hdmi_pm_ops, }, }; diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 2aee356840a2..314015d9e912 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -245,6 +245,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info) mutex_unlock(&ring_info->ring_buffer_mutex); kfree(ring_info->pkt_buffer); + ring_info->pkt_buffer = NULL; ring_info->pkt_buffer_size = 0; } diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index f798922a4598..882c3c8ba399 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -28,10 +28,6 @@ MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>"); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); -static bool use_ktime = true; -module_param(use_ktime, bool, 0400); -MODULE_PARM_DESC(use_ktime, "Use ktime for measuring I/O speed"); - /* * Option parsing. */ @@ -110,7 +106,6 @@ struct analog_port { char cooked; int bads; int reads; - int speed; int loop; int fuzz; int axes[4]; @@ -120,66 +115,6 @@ struct analog_port { }; /* - * Time macros. - */ - -#ifdef __i386__ - -#include <linux/i8253.h> - -#define GET_TIME(x) do { if (boot_cpu_has(X86_FEATURE_TSC)) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0) -#define DELTA(x,y) (boot_cpu_has(X86_FEATURE_TSC) ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0))) -#define TIME_NAME (boot_cpu_has(X86_FEATURE_TSC)?"TSC":"PIT") -static unsigned int get_time_pit(void) -{ - unsigned long flags; - unsigned int count; - - raw_spin_lock_irqsave(&i8253_lock, flags); - outb_p(0x00, 0x43); - count = inb_p(0x40); - count |= inb_p(0x40) << 8; - raw_spin_unlock_irqrestore(&i8253_lock, flags); - - return count; -} -#elif defined(__x86_64__) -#define GET_TIME(x) do { x = (unsigned int)rdtsc(); } while (0) -#define DELTA(x,y) ((y)-(x)) -#define TIME_NAME "TSC" -#elif defined(__alpha__) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_PPC) || defined(CONFIG_RISCV) -#define GET_TIME(x) do { x = get_cycles(); } while (0) -#define DELTA(x,y) ((y)-(x)) -#define TIME_NAME "get_cycles" -#else -#define FAKE_TIME -static unsigned long analog_faketime = 0; -#define GET_TIME(x) do { x = analog_faketime++; } while(0) -#define DELTA(x,y) ((y)-(x)) -#define TIME_NAME "Unreliable" -#warning Precise timer not defined for this architecture. -#endif - -static inline u64 get_time(void) -{ - if (use_ktime) { - return ktime_get_ns(); - } else { - unsigned int x; - GET_TIME(x); - return x; - } -} - -static inline unsigned int delta(u64 x, u64 y) -{ - if (use_ktime) - return y - x; - else - return DELTA((unsigned int)x, (unsigned int)y); -} - -/* * analog_decode() decodes analog joystick data and reports input events. */ @@ -234,18 +169,18 @@ static void analog_decode(struct analog *analog, int *axes, int *initial, int bu static int analog_cooked_read(struct analog_port *port) { struct gameport *gameport = port->gameport; - u64 time[4], start, loop, now; + ktime_t time[4], start, loop, now; unsigned int loopout, timeout; unsigned char data[4], this, last; unsigned long flags; int i, j; loopout = (ANALOG_LOOP_TIME * port->loop) / 1000; - timeout = ANALOG_MAX_TIME * port->speed; + timeout = ANALOG_MAX_TIME * NSEC_PER_MSEC; local_irq_save(flags); gameport_trigger(gameport); - now = get_time(); + now = ktime_get(); local_irq_restore(flags); start = now; @@ -258,16 +193,16 @@ static int analog_cooked_read(struct analog_port *port) local_irq_disable(); this = gameport_read(gameport) & port->mask; - now = get_time(); + now = ktime_get(); local_irq_restore(flags); - if ((last ^ this) && (delta(loop, now) < loopout)) { + if ((last ^ this) && (ktime_sub(now, loop) < loopout)) { data[i] = last ^ this; time[i] = now; i++; } - } while (this && (i < 4) && (delta(start, now) < timeout)); + } while (this && (i < 4) && (ktime_sub(now, start) < timeout)); this <<= 4; @@ -275,7 +210,7 @@ static int analog_cooked_read(struct analog_port *port) this |= data[i]; for (j = 0; j < 4; j++) if (data[i] & (1 << j)) - port->axes[j] = (delta(start, time[i]) << ANALOG_FUZZ_BITS) / port->loop; + port->axes[j] = ((u32)ktime_sub(time[i], start) << ANALOG_FUZZ_BITS) / port->loop; } return -(this != port->mask); @@ -375,38 +310,22 @@ static void analog_calibrate_timer(struct analog_port *port) { struct gameport *gameport = port->gameport; unsigned int i, t, tx; - u64 t1, t2, t3; + ktime_t t1, t2, t3; unsigned long flags; - if (use_ktime) { - port->speed = 1000000; - } else { - local_irq_save(flags); - t1 = get_time(); -#ifdef FAKE_TIME - analog_faketime += 830; -#endif - mdelay(1); - t2 = get_time(); - t3 = get_time(); - local_irq_restore(flags); - - port->speed = delta(t1, t2) - delta(t2, t3); - } - tx = ~0; for (i = 0; i < 50; i++) { local_irq_save(flags); - t1 = get_time(); + t1 = ktime_get(); for (t = 0; t < 50; t++) { gameport_read(gameport); - t2 = get_time(); + t2 = ktime_get(); } - t3 = get_time(); + t3 = ktime_get(); local_irq_restore(flags); udelay(i); - t = delta(t1, t2) - delta(t2, t3); + t = ktime_sub(t2, t1) - ktime_sub(t3, t2); if (t < tx) tx = t; } @@ -611,7 +530,7 @@ static int analog_init_port(struct gameport *gameport, struct gameport_driver *d t = gameport_read(gameport); msleep(ANALOG_MAX_TIME); port->mask = (gameport_read(gameport) ^ t) & t & 0xf; - port->fuzz = (port->speed * ANALOG_FUZZ_MAGIC) / port->loop / 1000 + ANALOG_FUZZ_BITS; + port->fuzz = (NSEC_PER_MSEC * ANALOG_FUZZ_MAGIC) / port->loop / 1000 + ANALOG_FUZZ_BITS; for (i = 0; i < ANALOG_INIT_RETRIES; i++) { if (!analog_cooked_read(port)) diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 40a070a2e7f5..e75650e98c9e 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -210,7 +210,7 @@ config KEYBOARD_LKKBD select SERIO help Say Y here if you want to use a LK201 or LK401 style serial - keyboard. This keyboard is also useable on PCs if you attach + keyboard. This keyboard is also usable on PCs if you attach it with the inputattach program. The connector pinout is described within lkkbd.c. diff --git a/drivers/input/keyboard/adc-keys.c b/drivers/input/keyboard/adc-keys.c index 6d5be48d1b3d..bf72ab8df817 100644 --- a/drivers/input/keyboard/adc-keys.c +++ b/drivers/input/keyboard/adc-keys.c @@ -193,7 +193,7 @@ static const struct of_device_id adc_keys_of_match[] = { MODULE_DEVICE_TABLE(of, adc_keys_of_match); #endif -static struct platform_driver __refdata adc_keys_driver = { +static struct platform_driver adc_keys_driver = { .driver = { .name = "adc_keys", .of_match_table = of_match_ptr(adc_keys_of_match), diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c index 90a59b973d00..1592da4de336 100644 --- a/drivers/input/keyboard/adp5588-keys.c +++ b/drivers/input/keyboard/adp5588-keys.c @@ -17,7 +17,7 @@ #include <linux/platform_device.h> #include <linux/input.h> #include <linux/i2c.h> -#include <linux/gpio.h> +#include <linux/gpio/driver.h> #include <linux/slab.h> #include <linux/platform_data/adp5588.h> diff --git a/drivers/input/keyboard/adp5589-keys.c b/drivers/input/keyboard/adp5589-keys.c index 654e0476406b..bdd264459a97 100644 --- a/drivers/input/keyboard/adp5589-keys.c +++ b/drivers/input/keyboard/adp5589-keys.c @@ -18,7 +18,7 @@ #include <linux/platform_device.h> #include <linux/input.h> #include <linux/i2c.h> -#include <linux/gpio.h> +#include <linux/gpio/driver.h> #include <linux/slab.h> #include <linux/input/adp5589.h> diff --git a/drivers/input/keyboard/ep93xx_keypad.c b/drivers/input/keyboard/ep93xx_keypad.c index c8194333d612..e0e931e796fa 100644 --- a/drivers/input/keyboard/ep93xx_keypad.c +++ b/drivers/input/keyboard/ep93xx_keypad.c @@ -157,7 +157,7 @@ static int ep93xx_keypad_open(struct input_dev *pdev) if (!keypad->enabled) { ep93xx_keypad_config(keypad); - clk_enable(keypad->clk); + clk_prepare_enable(keypad->clk); keypad->enabled = true; } @@ -169,7 +169,7 @@ static void ep93xx_keypad_close(struct input_dev *pdev) struct ep93xx_keypad *keypad = input_get_drvdata(pdev); if (keypad->enabled) { - clk_disable(keypad->clk); + clk_disable_unprepare(keypad->clk); keypad->enabled = false; } } diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index 498cde376981..dd5227cf8696 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -309,18 +309,6 @@ config INPUT_GPIO_VIBRA To compile this driver as a module, choose M here: the module will be called gpio-vibra. -config INPUT_IXP4XX_BEEPER - tristate "IXP4XX Beeper support" - depends on ARCH_IXP4XX - help - If you say yes here, you can connect a beeper to the - ixp4xx gpio pins. This is used by the LinkSys NSLU2. - - If unsure, say Y. - - To compile this driver as a module, choose M here: the - module will be called ixp4xx-beeper. - config INPUT_COBALT_BTNS tristate "Cobalt button interface" depends on MIPS_COBALT @@ -811,16 +799,6 @@ config INPUT_XEN_KBDDEV_FRONTEND To compile this driver as a module, choose M here: the module will be called xen-kbdfront. -config INPUT_SIRFSOC_ONKEY - tristate "CSR SiRFSoC power on/off/suspend key support" - depends on ARCH_SIRF && OF - default y - help - Say Y here if you want to support for the SiRFSoC power on/off/suspend key - in Linux, after you press the onkey, system will suspend. - - If unsure, say N. - config INPUT_IDEAPAD_SLIDEBAR tristate "IdeaPad Laptop Slidebar" depends on INPUT diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile index f593beed7e05..b92c53a6b5ae 100644 --- a/drivers/input/misc/Makefile +++ b/drivers/input/misc/Makefile @@ -44,7 +44,6 @@ obj-$(CONFIG_HP_SDC_RTC) += hp_sdc_rtc.o obj-$(CONFIG_INPUT_IMS_PCU) += ims-pcu.o obj-$(CONFIG_INPUT_IQS269A) += iqs269a.o obj-$(CONFIG_INPUT_IQS626A) += iqs626a.o -obj-$(CONFIG_INPUT_IXP4XX_BEEPER) += ixp4xx-beeper.o obj-$(CONFIG_INPUT_KEYSPAN_REMOTE) += keyspan_remote.o obj-$(CONFIG_INPUT_KXTJ9) += kxtj9.o obj-$(CONFIG_INPUT_M68K_BEEP) += m68kspkr.o @@ -74,7 +73,6 @@ obj-$(CONFIG_INPUT_GPIO_ROTARY_ENCODER) += rotary_encoder.o obj-$(CONFIG_INPUT_RK805_PWRKEY) += rk805-pwrkey.o obj-$(CONFIG_INPUT_SC27XX_VIBRA) += sc27xx-vibra.o obj-$(CONFIG_INPUT_SGI_BTNS) += sgi_btns.o -obj-$(CONFIG_INPUT_SIRFSOC_ONKEY) += sirfsoc-onkey.o obj-$(CONFIG_INPUT_SOC_BUTTON_ARRAY) += soc_button_array.o obj-$(CONFIG_INPUT_SPARCSPKR) += sparcspkr.o obj-$(CONFIG_INPUT_STPMIC1_ONKEY) += stpmic1_onkey.o diff --git a/drivers/input/misc/ixp4xx-beeper.c b/drivers/input/misc/ixp4xx-beeper.c deleted file mode 100644 index 05018d0c97c7..000000000000 --- a/drivers/input/misc/ixp4xx-beeper.c +++ /dev/null @@ -1,183 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Generic IXP4xx beeper driver - * - * Copyright (C) 2005 Tower Technologies - * - * based on nslu2-io.c - * Copyright (C) 2004 Karen Spearel - * - * Author: Alessandro Zummo <a.zummo@towertech.it> - * Maintainers: http://www.nslu2-linux.org/ - */ - -#include <linux/module.h> -#include <linux/input.h> -#include <linux/delay.h> -#include <linux/platform_device.h> -#include <linux/interrupt.h> -#include <linux/gpio.h> -#include <mach/hardware.h> - -MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>"); -MODULE_DESCRIPTION("ixp4xx beeper driver"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:ixp4xx-beeper"); - -static DEFINE_SPINLOCK(beep_lock); - -static int ixp4xx_timer2_irq; - -static void ixp4xx_spkr_control(unsigned int pin, unsigned int count) -{ - unsigned long flags; - - spin_lock_irqsave(&beep_lock, flags); - - if (count) { - gpio_direction_output(pin, 0); - *IXP4XX_OSRT2 = (count & ~IXP4XX_OST_RELOAD_MASK) | IXP4XX_OST_ENABLE; - } else { - gpio_direction_output(pin, 1); - gpio_direction_input(pin); - *IXP4XX_OSRT2 = 0; - } - - spin_unlock_irqrestore(&beep_lock, flags); -} - -static int ixp4xx_spkr_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) -{ - unsigned int pin = (unsigned int) input_get_drvdata(dev); - unsigned int count = 0; - - if (type != EV_SND) - return -1; - - switch (code) { - case SND_BELL: - if (value) - value = 1000; - case SND_TONE: - break; - default: - return -1; - } - - if (value > 20 && value < 32767) - count = (ixp4xx_timer_freq / (value * 4)) - 1; - - ixp4xx_spkr_control(pin, count); - - return 0; -} - -static irqreturn_t ixp4xx_spkr_interrupt(int irq, void *dev_id) -{ - unsigned int pin = (unsigned int) dev_id; - - /* clear interrupt */ - *IXP4XX_OSST = IXP4XX_OSST_TIMER_2_PEND; - - /* flip the beeper output */ - gpio_set_value(pin, !gpio_get_value(pin)); - - return IRQ_HANDLED; -} - -static int ixp4xx_spkr_probe(struct platform_device *dev) -{ - struct input_dev *input_dev; - int irq; - int err; - - input_dev = input_allocate_device(); - if (!input_dev) - return -ENOMEM; - - input_set_drvdata(input_dev, (void *) dev->id); - - input_dev->name = "ixp4xx beeper"; - input_dev->phys = "ixp4xx/gpio"; - input_dev->id.bustype = BUS_HOST; - input_dev->id.vendor = 0x001f; - input_dev->id.product = 0x0001; - input_dev->id.version = 0x0100; - input_dev->dev.parent = &dev->dev; - - input_dev->evbit[0] = BIT_MASK(EV_SND); - input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE); - input_dev->event = ixp4xx_spkr_event; - - irq = platform_get_irq(dev, 0); - if (irq < 0) { - err = irq; - goto err_free_device; - } - - err = gpio_request(dev->id, "ixp4-beeper"); - if (err) - goto err_free_device; - - err = request_irq(irq, &ixp4xx_spkr_interrupt, - IRQF_NO_SUSPEND, "ixp4xx-beeper", - (void *) dev->id); - if (err) - goto err_free_gpio; - ixp4xx_timer2_irq = irq; - - err = input_register_device(input_dev); - if (err) - goto err_free_irq; - - platform_set_drvdata(dev, input_dev); - - return 0; - - err_free_irq: - free_irq(irq, (void *)dev->id); - err_free_gpio: - gpio_free(dev->id); - err_free_device: - input_free_device(input_dev); - - return err; -} - -static int ixp4xx_spkr_remove(struct platform_device *dev) -{ - struct input_dev *input_dev = platform_get_drvdata(dev); - unsigned int pin = (unsigned int) input_get_drvdata(input_dev); - - input_unregister_device(input_dev); - - /* turn the speaker off */ - disable_irq(ixp4xx_timer2_irq); - ixp4xx_spkr_control(pin, 0); - - free_irq(ixp4xx_timer2_irq, (void *)dev->id); - gpio_free(dev->id); - - return 0; -} - -static void ixp4xx_spkr_shutdown(struct platform_device *dev) -{ - struct input_dev *input_dev = platform_get_drvdata(dev); - unsigned int pin = (unsigned int) input_get_drvdata(input_dev); - - /* turn off the speaker */ - disable_irq(ixp4xx_timer2_irq); - ixp4xx_spkr_control(pin, 0); -} - -static struct platform_driver ixp4xx_spkr_platform_driver = { - .driver = { - .name = "ixp4xx-beeper", - }, - .probe = ixp4xx_spkr_probe, - .remove = ixp4xx_spkr_remove, - .shutdown = ixp4xx_spkr_shutdown, -}; -module_platform_driver(ixp4xx_spkr_platform_driver); - diff --git a/drivers/input/misc/pm8941-pwrkey.c b/drivers/input/misc/pm8941-pwrkey.c index 10e3fc0eac6e..33609603245d 100644 --- a/drivers/input/misc/pm8941-pwrkey.c +++ b/drivers/input/misc/pm8941-pwrkey.c @@ -284,7 +284,7 @@ static int pm8941_pwrkey_probe(struct platform_device *pdev) } if (pwrkey->data->supports_ps_hold_poff_config) { - pwrkey->reboot_notifier.notifier_call = pm8941_reboot_notify, + pwrkey->reboot_notifier.notifier_call = pm8941_reboot_notify; error = register_reboot_notifier(&pwrkey->reboot_notifier); if (error) { dev_err(&pdev->dev, "failed to register reboot notifier: %d\n", diff --git a/drivers/input/misc/sirfsoc-onkey.c b/drivers/input/misc/sirfsoc-onkey.c deleted file mode 100644 index 7982bf8fb839..000000000000 --- a/drivers/input/misc/sirfsoc-onkey.c +++ /dev/null @@ -1,207 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Power key driver for SiRF PrimaII - * - * Copyright (c) 2013 - 2014 Cambridge Silicon Radio Limited, a CSR plc group - * company. - */ - -#include <linux/module.h> -#include <linux/interrupt.h> -#include <linux/delay.h> -#include <linux/platform_device.h> -#include <linux/input.h> -#include <linux/rtc/sirfsoc_rtciobrg.h> -#include <linux/of.h> -#include <linux/workqueue.h> - -struct sirfsoc_pwrc_drvdata { - u32 pwrc_base; - struct input_dev *input; - struct delayed_work work; -}; - -#define PWRC_ON_KEY_BIT (1 << 0) - -#define PWRC_INT_STATUS 0xc -#define PWRC_INT_MASK 0x10 -#define PWRC_PIN_STATUS 0x14 -#define PWRC_KEY_DETECT_UP_TIME 20 /* ms*/ - -static int sirfsoc_pwrc_is_on_key_down(struct sirfsoc_pwrc_drvdata *pwrcdrv) -{ - u32 state = sirfsoc_rtc_iobrg_readl(pwrcdrv->pwrc_base + - PWRC_PIN_STATUS); - return !(state & PWRC_ON_KEY_BIT); /* ON_KEY is active low */ -} - -static void sirfsoc_pwrc_report_event(struct work_struct *work) -{ - struct sirfsoc_pwrc_drvdata *pwrcdrv = - container_of(work, struct sirfsoc_pwrc_drvdata, work.work); - - if (sirfsoc_pwrc_is_on_key_down(pwrcdrv)) { - schedule_delayed_work(&pwrcdrv->work, - msecs_to_jiffies(PWRC_KEY_DETECT_UP_TIME)); - } else { - input_event(pwrcdrv->input, EV_KEY, KEY_POWER, 0); - input_sync(pwrcdrv->input); - } -} - -static irqreturn_t sirfsoc_pwrc_isr(int irq, void *dev_id) -{ - struct sirfsoc_pwrc_drvdata *pwrcdrv = dev_id; - u32 int_status; - - int_status = sirfsoc_rtc_iobrg_readl(pwrcdrv->pwrc_base + - PWRC_INT_STATUS); - sirfsoc_rtc_iobrg_writel(int_status & ~PWRC_ON_KEY_BIT, - pwrcdrv->pwrc_base + PWRC_INT_STATUS); - - input_event(pwrcdrv->input, EV_KEY, KEY_POWER, 1); - input_sync(pwrcdrv->input); - schedule_delayed_work(&pwrcdrv->work, - msecs_to_jiffies(PWRC_KEY_DETECT_UP_TIME)); - - return IRQ_HANDLED; -} - -static void sirfsoc_pwrc_toggle_interrupts(struct sirfsoc_pwrc_drvdata *pwrcdrv, - bool enable) -{ - u32 int_mask; - - int_mask = sirfsoc_rtc_iobrg_readl(pwrcdrv->pwrc_base + PWRC_INT_MASK); - if (enable) - int_mask |= PWRC_ON_KEY_BIT; - else - int_mask &= ~PWRC_ON_KEY_BIT; - sirfsoc_rtc_iobrg_writel(int_mask, pwrcdrv->pwrc_base + PWRC_INT_MASK); -} - -static int sirfsoc_pwrc_open(struct input_dev *input) -{ - struct sirfsoc_pwrc_drvdata *pwrcdrv = input_get_drvdata(input); - - sirfsoc_pwrc_toggle_interrupts(pwrcdrv, true); - - return 0; -} - -static void sirfsoc_pwrc_close(struct input_dev *input) -{ - struct sirfsoc_pwrc_drvdata *pwrcdrv = input_get_drvdata(input); - - sirfsoc_pwrc_toggle_interrupts(pwrcdrv, false); - cancel_delayed_work_sync(&pwrcdrv->work); -} - -static const struct of_device_id sirfsoc_pwrc_of_match[] = { - { .compatible = "sirf,prima2-pwrc" }, - {}, -}; -MODULE_DEVICE_TABLE(of, sirfsoc_pwrc_of_match); - -static int sirfsoc_pwrc_probe(struct platform_device *pdev) -{ - struct device_node *np = pdev->dev.of_node; - struct sirfsoc_pwrc_drvdata *pwrcdrv; - int irq; - int error; - - pwrcdrv = devm_kzalloc(&pdev->dev, sizeof(struct sirfsoc_pwrc_drvdata), - GFP_KERNEL); - if (!pwrcdrv) { - dev_info(&pdev->dev, "Not enough memory for the device data\n"); - return -ENOMEM; - } - - /* - * We can't use of_iomap because pwrc is not mapped in memory, - * the so-called base address is only offset in rtciobrg - */ - error = of_property_read_u32(np, "reg", &pwrcdrv->pwrc_base); - if (error) { - dev_err(&pdev->dev, - "unable to find base address of pwrc node in dtb\n"); - return error; - } - - pwrcdrv->input = devm_input_allocate_device(&pdev->dev); - if (!pwrcdrv->input) - return -ENOMEM; - - pwrcdrv->input->name = "sirfsoc pwrckey"; - pwrcdrv->input->phys = "pwrc/input0"; - pwrcdrv->input->evbit[0] = BIT_MASK(EV_KEY); - input_set_capability(pwrcdrv->input, EV_KEY, KEY_POWER); - - INIT_DELAYED_WORK(&pwrcdrv->work, sirfsoc_pwrc_report_event); - - pwrcdrv->input->open = sirfsoc_pwrc_open; - pwrcdrv->input->close = sirfsoc_pwrc_close; - - input_set_drvdata(pwrcdrv->input, pwrcdrv); - - /* Make sure the device is quiesced */ - sirfsoc_pwrc_toggle_interrupts(pwrcdrv, false); - - irq = platform_get_irq(pdev, 0); - error = devm_request_irq(&pdev->dev, irq, - sirfsoc_pwrc_isr, 0, - "sirfsoc_pwrc_int", pwrcdrv); - if (error) { - dev_err(&pdev->dev, "unable to claim irq %d, error: %d\n", - irq, error); - return error; - } - - error = input_register_device(pwrcdrv->input); - if (error) { - dev_err(&pdev->dev, - "unable to register input device, error: %d\n", - error); - return error; - } - - dev_set_drvdata(&pdev->dev, pwrcdrv); - device_init_wakeup(&pdev->dev, 1); - - return 0; -} - -static int __maybe_unused sirfsoc_pwrc_resume(struct device *dev) -{ - struct sirfsoc_pwrc_drvdata *pwrcdrv = dev_get_drvdata(dev); - struct input_dev *input = pwrcdrv->input; - - /* - * Do not mask pwrc interrupt as we want pwrc work as a wakeup source - * if users touch X_ONKEY_B, see arch/arm/mach-prima2/pm.c - */ - mutex_lock(&input->mutex); - if (input_device_enabled(input)) - sirfsoc_pwrc_toggle_interrupts(pwrcdrv, true); - mutex_unlock(&input->mutex); - - return 0; -} - -static SIMPLE_DEV_PM_OPS(sirfsoc_pwrc_pm_ops, NULL, sirfsoc_pwrc_resume); - -static struct platform_driver sirfsoc_pwrc_driver = { - .probe = sirfsoc_pwrc_probe, - .driver = { - .name = "sirfsoc-pwrc", - .pm = &sirfsoc_pwrc_pm_ops, - .of_match_table = sirfsoc_pwrc_of_match, - } -}; - -module_platform_driver(sirfsoc_pwrc_driver); - -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Binghua Duan <Binghua.Duan@csr.com>, Xianglong Du <Xianglong.Du@csr.com>"); -MODULE_DESCRIPTION("CSR Prima2 PWRC Driver"); -MODULE_ALIAS("platform:sirfsoc-pwrc"); diff --git a/drivers/input/mouse/elan_i2c.h b/drivers/input/mouse/elan_i2c.h index dc4a240f4489..3c84deefa327 100644 --- a/drivers/input/mouse/elan_i2c.h +++ b/drivers/input/mouse/elan_i2c.h @@ -55,8 +55,9 @@ #define ETP_FW_PAGE_SIZE_512 512 #define ETP_FW_SIGNATURE_SIZE 6 -#define ETP_PRODUCT_ID_DELBIN 0x00C2 +#define ETP_PRODUCT_ID_WHITEBOX 0x00B8 #define ETP_PRODUCT_ID_VOXEL 0x00BF +#define ETP_PRODUCT_ID_DELBIN 0x00C2 #define ETP_PRODUCT_ID_MAGPIE 0x0120 #define ETP_PRODUCT_ID_BOBBA 0x0121 diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index dad22c1ea6a0..47af62c12267 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -105,6 +105,7 @@ static u32 elan_i2c_lookup_quirks(u16 ic_type, u16 product_id) u32 quirks; } elan_i2c_quirks[] = { { 0x0D, ETP_PRODUCT_ID_DELBIN, ETP_QUIRK_QUICK_WAKEUP }, + { 0x0D, ETP_PRODUCT_ID_WHITEBOX, ETP_QUIRK_QUICK_WAKEUP }, { 0x10, ETP_PRODUCT_ID_VOXEL, ETP_QUIRK_QUICK_WAKEUP }, { 0x14, ETP_PRODUCT_ID_MAGPIE, ETP_QUIRK_QUICK_WAKEUP }, { 0x14, ETP_PRODUCT_ID_BOBBA, ETP_QUIRK_QUICK_WAKEUP }, diff --git a/drivers/input/serio/parkbd.c b/drivers/input/serio/parkbd.c index 3ac57a91ede4..51b68501896c 100644 --- a/drivers/input/serio/parkbd.c +++ b/drivers/input/serio/parkbd.c @@ -220,16 +220,4 @@ static struct parport_driver parkbd_parport_driver = { .detach = parkbd_detach, .devmodel = true, }; - -static int __init parkbd_init(void) -{ - return parport_register_driver(&parkbd_parport_driver); -} - -static void __exit parkbd_exit(void) -{ - parport_unregister_driver(&parkbd_parport_driver); -} - -module_init(parkbd_init); -module_exit(parkbd_exit); +module_parport_driver(parkbd_parport_driver); diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index ad454cd2855a..d4e74738c5a8 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -932,7 +932,7 @@ config TOUCHSCREEN_USB_COMPOSITE - JASTEC USB Touch Controller/DigiTech DTR-02U - Zytronic controllers - Elo TouchSystems 2700 IntelliTouch - - EasyTouch USB Touch Controller from Data Modul + - EasyTouch USB Touch Controller from Data Module - e2i (Mimo monitors) Have a look at <http://linux.chapter7.ch/touchkit/> for diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index 263de3bfb6cd..bb2e1cbffba7 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -899,6 +899,7 @@ static int edt_ft5x06_ts_identify(struct i2c_client *client, * the identification registers. */ switch (rdbuf[0]) { + case 0x11: /* EDT EP0110M09 */ case 0x35: /* EDT EP0350M09 */ case 0x43: /* EDT EP0430M09 */ case 0x50: /* EDT EP0500M09 */ diff --git a/drivers/input/touchscreen/mms114.c b/drivers/input/touchscreen/mms114.c index 0efd1a1bb192..9fa3b0e421be 100644 --- a/drivers/input/touchscreen/mms114.c +++ b/drivers/input/touchscreen/mms114.c @@ -54,6 +54,7 @@ enum mms_type { TYPE_MMS114 = 114, + TYPE_MMS134S = 134, TYPE_MMS136 = 136, TYPE_MMS152 = 152, TYPE_MMS345L = 345, @@ -212,7 +213,7 @@ static irqreturn_t mms114_interrupt(int irq, void *dev_id) goto out; /* MMS136 has slightly different event size */ - if (data->type == TYPE_MMS136) + if (data->type == TYPE_MMS134S || data->type == TYPE_MMS136) touch_size = packet_size / MMS136_EVENT_SIZE; else touch_size = packet_size / MMS114_EVENT_SIZE; @@ -281,6 +282,7 @@ static int mms114_get_version(struct mms114_data *data) break; case TYPE_MMS114: + case TYPE_MMS134S: case TYPE_MMS136: error = __mms114_read_reg(data, MMS114_TSP_REV, 6, buf); if (error) @@ -304,8 +306,9 @@ static int mms114_setup_regs(struct mms114_data *data) if (error < 0) return error; - /* Only MMS114 and MMS136 have configuration and power on registers */ - if (data->type != TYPE_MMS114 && data->type != TYPE_MMS136) + /* MMS114, MMS134S and MMS136 have configuration and power on registers */ + if (data->type != TYPE_MMS114 && data->type != TYPE_MMS134S && + data->type != TYPE_MMS136) return 0; error = mms114_set_active(data, true); @@ -487,7 +490,8 @@ static int mms114_probe(struct i2c_client *client, 0, data->props.max_y, 0, 0); } - if (data->type == TYPE_MMS114 || data->type == TYPE_MMS136) { + if (data->type == TYPE_MMS114 || data->type == TYPE_MMS134S || + data->type == TYPE_MMS136) { /* * The firmware handles movement and pressure fuzz, so * don't duplicate that in software. @@ -612,6 +616,9 @@ static const struct of_device_id mms114_dt_match[] = { .compatible = "melfas,mms114", .data = (void *)TYPE_MMS114, }, { + .compatible = "melfas,mms134s", + .data = (void *)TYPE_MMS134S, + }, { .compatible = "melfas,mms136", .data = (void *)TYPE_MMS136, }, { diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 8ad8618b3530..124c41adeca1 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -82,7 +82,7 @@ config IOMMU_DEBUGFS choice prompt "IOMMU default domain type" depends on IOMMU_API - default IOMMU_DEFAULT_DMA_LAZY if AMD_IOMMU || INTEL_IOMMU + default IOMMU_DEFAULT_DMA_LAZY if X86 || IA64 default IOMMU_DEFAULT_DMA_STRICT help Choose the type of IOMMU domain used to manage DMA API usage by diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index bdcf167b4afe..2a822b229bd0 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -297,6 +297,22 @@ int amd_iommu_get_num_iommus(void) return amd_iommus_present; } +#ifdef CONFIG_IRQ_REMAP +static bool check_feature_on_all_iommus(u64 mask) +{ + bool ret = false; + struct amd_iommu *iommu; + + for_each_iommu(iommu) { + ret = iommu_feature(iommu, mask); + if (!ret) + return false; + } + + return true; +} +#endif + /* * For IVHD type 0x11/0x40, EFR is also available via IVHD. * Default to IVHD EFR since it is available sooner @@ -813,9 +829,9 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu) return 0; } -#ifdef CONFIG_IRQ_REMAP static int iommu_init_ga_log(struct amd_iommu *iommu) { +#ifdef CONFIG_IRQ_REMAP u64 entry; if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) @@ -845,25 +861,9 @@ static int iommu_init_ga_log(struct amd_iommu *iommu) err_out: free_ga_log(iommu); return -EINVAL; -} -#endif /* CONFIG_IRQ_REMAP */ - -static int iommu_init_ga(struct amd_iommu *iommu) -{ - int ret = 0; - -#ifdef CONFIG_IRQ_REMAP - /* Note: We have already checked GASup from IVRS table. - * Now, we need to make sure that GAMSup is set. - */ - if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && - !iommu_feature(iommu, FEATURE_GAM_VAPIC)) - amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; - - ret = iommu_init_ga_log(iommu); +#else + return 0; #endif /* CONFIG_IRQ_REMAP */ - - return ret; } static int __init alloc_cwwb_sem(struct amd_iommu *iommu) @@ -1845,7 +1845,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu)) return -ENOMEM; - ret = iommu_init_ga(iommu); + ret = iommu_init_ga_log(iommu); if (ret) return ret; @@ -2479,6 +2479,14 @@ static void early_enable_iommus(void) } #ifdef CONFIG_IRQ_REMAP + /* + * Note: We have already checked GASup from IVRS table. + * Now, we need to make sure that GAMSup is set. + */ + if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && + !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) + amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; + if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP); #endif diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 2014fe8695ac..0c228787704f 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -514,9 +514,6 @@ static void load_pasid(struct mm_struct *mm, u32 pasid) { mutex_lock(&mm->context.lock); - /* Synchronize with READ_ONCE in update_pasid(). */ - smp_store_release(&mm->pasid, pasid); - /* Update PASID MSR on all CPUs running the mm's tasks. */ on_each_cpu_mask(mm_cpumask(mm), _load_pasid, NULL, true); @@ -792,7 +789,19 @@ prq_retry: goto prq_retry; } + /* + * A work in IO page fault workqueue may try to lock pasid_mutex now. + * Holding pasid_mutex while waiting in iopf_queue_flush_dev() for + * all works in the workqueue to finish may cause deadlock. + * + * It's unnecessary to hold pasid_mutex in iopf_queue_flush_dev(). + * Unlock it to allow the works to be handled while waiting for + * them to finish. + */ + lockdep_assert_held(&pasid_mutex); + mutex_unlock(&pasid_mutex); iopf_queue_flush_dev(dev); + mutex_lock(&pasid_mutex); /* * Perform steps described in VT-d spec CH7.10 to drain page diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 0af42fb93a49..9e8bc802ac05 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -519,6 +519,7 @@ retry: return new_iova->pfn_lo; } +EXPORT_SYMBOL_GPL(alloc_iova_fast); /** * free_iova_fast - free iova pfn range into rcache @@ -536,6 +537,7 @@ free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) free_iova(iovad, pfn); } +EXPORT_SYMBOL_GPL(free_iova_fast); #define fq_ring_for_each(i, fq) \ for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index 94fb63a7b357..fe63d5ee201b 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -570,7 +570,7 @@ fail_msg_node: fail_db_node: of_node_put(smu->db_node); fail_bootmem: - memblock_free(__pa(smu), sizeof(struct smu_device)); + memblock_free_ptr(smu, sizeof(struct smu_device)); smu = NULL; fail_np: of_node_put(np); diff --git a/drivers/misc/habanalabs/common/Makefile b/drivers/misc/habanalabs/common/Makefile index 5d8b48288cf4..6ebe3c7001ff 100644 --- a/drivers/misc/habanalabs/common/Makefile +++ b/drivers/misc/habanalabs/common/Makefile @@ -10,4 +10,5 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \ common/asid.o common/habanalabs_ioctl.o \ common/command_buffer.o common/hw_queue.o common/irq.o \ common/sysfs.o common/hwmon.o common/memory.o \ - common/command_submission.o common/firmware_if.o + common/command_submission.o common/firmware_if.o \ + common/state_dump.o diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index 719168c980a4..8132a84698d5 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -314,8 +314,6 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, spin_lock(&mgr->cb_lock); rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC); - if (rc < 0) - rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_KERNEL); spin_unlock(&mgr->cb_lock); if (rc < 0) { @@ -552,7 +550,7 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) vma->vm_private_data = cb; - rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address, + rc = hdev->asic_funcs->mmap(hdev, vma, cb->kernel_address, cb->bus_address, cb->size); if (rc) { spin_lock(&cb->lock); diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 80c60fb41bbc..7b0516cf808b 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -38,7 +38,11 @@ static void hl_sob_reset(struct kref *ref) kref); struct hl_device *hdev = hw_sob->hdev; + dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id); + hdev->asic_funcs->reset_sob(hdev, hw_sob); + + hw_sob->need_reset = false; } void hl_sob_reset_error(struct kref *ref) @@ -52,6 +56,24 @@ void hl_sob_reset_error(struct kref *ref) hw_sob->q_idx, hw_sob->sob_id); } +void hw_sob_put(struct hl_hw_sob *hw_sob) +{ + if (hw_sob) + kref_put(&hw_sob->kref, hl_sob_reset); +} + +static void hw_sob_put_err(struct hl_hw_sob *hw_sob) +{ + if (hw_sob) + kref_put(&hw_sob->kref, hl_sob_reset_error); +} + +void hw_sob_get(struct hl_hw_sob *hw_sob) +{ + if (hw_sob) + kref_get(&hw_sob->kref); +} + /** * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet * @sob_base: sob base id @@ -84,76 +106,29 @@ int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask) return 0; } -static void sob_reset_work(struct work_struct *work) -{ - struct hl_cs_compl *hl_cs_cmpl = - container_of(work, struct hl_cs_compl, sob_reset_work); - struct hl_device *hdev = hl_cs_cmpl->hdev; - - /* - * A signal CS can get completion while the corresponding wait - * for signal CS is on its way to the PQ. The wait for signal CS - * will get stuck if the signal CS incremented the SOB to its - * max value and there are no pending (submitted) waits on this - * SOB. - * We do the following to void this situation: - * 1. The wait for signal CS must get a ref for the signal CS as - * soon as possible in cs_ioctl_signal_wait() and put it - * before being submitted to the PQ but after it incremented - * the SOB refcnt in init_signal_wait_cs(). - * 2. Signal/Wait for signal CS will decrement the SOB refcnt - * here. - * These two measures guarantee that the wait for signal CS will - * reset the SOB upon completion rather than the signal CS and - * hence the above scenario is avoided. - */ - kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); - - if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) - hdev->asic_funcs->reset_sob_group(hdev, - hl_cs_cmpl->sob_group); - - kfree(hl_cs_cmpl); -} - static void hl_fence_release(struct kref *kref) { struct hl_fence *fence = container_of(kref, struct hl_fence, refcount); struct hl_cs_compl *hl_cs_cmpl = container_of(fence, struct hl_cs_compl, base_fence); - struct hl_device *hdev = hl_cs_cmpl->hdev; - /* EBUSY means the CS was never submitted and hence we don't have - * an attached hw_sob object that we should handle here - */ - if (fence->error == -EBUSY) - goto free; - - if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || - (hl_cs_cmpl->type == CS_TYPE_WAIT) || - (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) { - - dev_dbg(hdev->dev, - "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n", - hl_cs_cmpl->cs_seq, - hl_cs_cmpl->type, - hl_cs_cmpl->hw_sob->sob_id, - hl_cs_cmpl->sob_val); - - queue_work(hdev->sob_reset_wq, &hl_cs_cmpl->sob_reset_work); - - return; - } - -free: kfree(hl_cs_cmpl); } void hl_fence_put(struct hl_fence *fence) { - if (fence) - kref_put(&fence->refcount, hl_fence_release); + if (IS_ERR_OR_NULL(fence)) + return; + kref_put(&fence->refcount, hl_fence_release); +} + +void hl_fences_put(struct hl_fence **fence, int len) +{ + int i; + + for (i = 0; i < len; i++, fence++) + hl_fence_put(*fence); } void hl_fence_get(struct hl_fence *fence) @@ -473,11 +448,139 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) spin_unlock(&hdev->cs_mirror_lock); } +/* + * force_complete_multi_cs - complete all contexts that wait on multi-CS + * + * @hdev: pointer to habanalabs device structure + */ +static void force_complete_multi_cs(struct hl_device *hdev) +{ + int i; + + for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { + struct multi_cs_completion *mcs_compl; + + mcs_compl = &hdev->multi_cs_completion[i]; + + spin_lock(&mcs_compl->lock); + + if (!mcs_compl->used) { + spin_unlock(&mcs_compl->lock); + continue; + } + + /* when calling force complete no context should be waiting on + * multi-cS. + * We are calling the function as a protection for such case + * to free any pending context and print error message + */ + dev_err(hdev->dev, + "multi-CS completion context %d still waiting when calling force completion\n", + i); + complete_all(&mcs_compl->completion); + spin_unlock(&mcs_compl->lock); + } +} + +/* + * complete_multi_cs - complete all waiting entities on multi-CS + * + * @hdev: pointer to habanalabs device structure + * @cs: CS structure + * The function signals a waiting entity that has an overlapping stream masters + * with the completed CS. + * For example: + * - a completed CS worked on stream master QID 4, multi CS completion + * is actively waiting on stream master QIDs 3, 5. don't send signal as no + * common stream master QID + * - a completed CS worked on stream master QID 4, multi CS completion + * is actively waiting on stream master QIDs 3, 4. send signal as stream + * master QID 4 is common + */ +static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs) +{ + struct hl_fence *fence = cs->fence; + int i; + + /* in case of multi CS check for completion only for the first CS */ + if (cs->staged_cs && !cs->staged_first) + return; + + for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { + struct multi_cs_completion *mcs_compl; + + mcs_compl = &hdev->multi_cs_completion[i]; + if (!mcs_compl->used) + continue; + + spin_lock(&mcs_compl->lock); + + /* + * complete if: + * 1. still waiting for completion + * 2. the completed CS has at least one overlapping stream + * master with the stream masters in the completion + */ + if (mcs_compl->used && + (fence->stream_master_qid_map & + mcs_compl->stream_master_qid_map)) { + /* extract the timestamp only of first completed CS */ + if (!mcs_compl->timestamp) + mcs_compl->timestamp = + ktime_to_ns(fence->timestamp); + complete_all(&mcs_compl->completion); + } + + spin_unlock(&mcs_compl->lock); + } +} + +static inline void cs_release_sob_reset_handler(struct hl_device *hdev, + struct hl_cs *cs, + struct hl_cs_compl *hl_cs_cmpl) +{ + /* Skip this handler if the cs wasn't submitted, to avoid putting + * the hw_sob twice, since this case already handled at this point, + * also skip if the hw_sob pointer wasn't set. + */ + if (!hl_cs_cmpl->hw_sob || !cs->submitted) + return; + + spin_lock(&hl_cs_cmpl->lock); + + /* + * we get refcount upon reservation of signals or signal/wait cs for the + * hw_sob object, and need to put it when the first staged cs + * (which cotains the encaps signals) or cs signal/wait is completed. + */ + if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || + (hl_cs_cmpl->type == CS_TYPE_WAIT) || + (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) || + (!!hl_cs_cmpl->encaps_signals)) { + dev_dbg(hdev->dev, + "CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n", + hl_cs_cmpl->cs_seq, + hl_cs_cmpl->type, + hl_cs_cmpl->hw_sob->sob_id, + hl_cs_cmpl->sob_val); + + hw_sob_put(hl_cs_cmpl->hw_sob); + + if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) + hdev->asic_funcs->reset_sob_group(hdev, + hl_cs_cmpl->sob_group); + } + + spin_unlock(&hl_cs_cmpl->lock); +} + static void cs_do_release(struct kref *ref) { struct hl_cs *cs = container_of(ref, struct hl_cs, refcount); struct hl_device *hdev = cs->ctx->hdev; struct hl_cs_job *job, *tmp; + struct hl_cs_compl *hl_cs_cmpl = + container_of(cs->fence, struct hl_cs_compl, base_fence); cs->completed = true; @@ -493,8 +596,9 @@ static void cs_do_release(struct kref *ref) complete_job(hdev, job); if (!cs->submitted) { - /* In case the wait for signal CS was submitted, the put occurs - * in init_signal_wait_cs() or collective_wait_init_cs() + /* + * In case the wait for signal CS was submitted, the fence put + * occurs in init_signal_wait_cs() or collective_wait_init_cs() * right before hanging on the PQ. */ if (cs->type == CS_TYPE_WAIT || @@ -535,8 +639,20 @@ static void cs_do_release(struct kref *ref) list_del(&cs->staged_cs_node); spin_unlock(&hdev->cs_mirror_lock); } + + /* decrement refcount to handle when first staged cs + * with encaps signals is completed. + */ + if (hl_cs_cmpl->encaps_signals) + kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount, + hl_encaps_handle_do_release); } + if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) + && cs->encaps_signals) + kref_put(&cs->encaps_sig_hdl->refcount, + hl_encaps_handle_do_release); + out: /* Must be called before hl_ctx_put because inside we use ctx to get * the device @@ -566,6 +682,10 @@ out: if (cs->timestamp) cs->fence->timestamp = ktime_get(); complete_all(&cs->fence->completion); + complete_multi_cs(hdev, cs); + + cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl); + hl_fence_put(cs->fence); kfree(cs->jobs_in_queue_cnt); @@ -621,6 +741,10 @@ static void cs_timedout(struct work_struct *work) break; } + rc = hl_state_dump(hdev); + if (rc) + dev_err(hdev->dev, "Error during system state dump %d\n", rc); + cs_put(cs); if (likely(!skip_reset_on_timeout)) { @@ -661,6 +785,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cs->completed = false; cs->type = cs_type; cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); + cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS); cs->timeout_jiffies = timeout; cs->skip_reset_on_timeout = hdev->skip_reset_on_timeout || @@ -671,9 +796,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, kref_init(&cs->refcount); spin_lock_init(&cs->job_lock); - cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC); + cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_ATOMIC); if (!cs_cmpl) - cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_KERNEL); + cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_KERNEL); if (!cs_cmpl) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); @@ -698,7 +823,6 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cs_cmpl->hdev = hdev; cs_cmpl->type = cs->type; spin_lock_init(&cs_cmpl->lock); - INIT_WORK(&cs_cmpl->sob_reset_work, sob_reset_work); cs->fence = &cs_cmpl->base_fence; spin_lock(&ctx->cs_lock); @@ -791,31 +915,22 @@ void hl_cs_rollback_all(struct hl_device *hdev) cs_rollback(hdev, cs); cs_put(cs); } -} - -void hl_pending_cb_list_flush(struct hl_ctx *ctx) -{ - struct hl_pending_cb *pending_cb, *tmp; - list_for_each_entry_safe(pending_cb, tmp, - &ctx->pending_cb_list, cb_node) { - list_del(&pending_cb->cb_node); - hl_cb_put(pending_cb->cb); - kfree(pending_cb); - } + force_complete_multi_cs(hdev); } static void wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) { struct hl_user_pending_interrupt *pend; + unsigned long flags; - spin_lock(&interrupt->wait_list_lock); + spin_lock_irqsave(&interrupt->wait_list_lock, flags); list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) { pend->fence.error = -EIO; complete_all(&pend->fence.completion); } - spin_unlock(&interrupt->wait_list_lock); + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); } void hl_release_pending_user_interrupts(struct hl_device *hdev) @@ -981,6 +1096,10 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags) return CS_TYPE_WAIT; else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT) return CS_TYPE_COLLECTIVE_WAIT; + else if (cs_type_flags & HL_CS_FLAGS_RESERVE_SIGNALS_ONLY) + return CS_RESERVE_SIGNALS; + else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY) + return CS_UNRESERVE_SIGNALS; else return CS_TYPE_DEFAULT; } @@ -1081,7 +1200,8 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev, } static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs, - u64 sequence, u32 flags) + u64 sequence, u32 flags, + u32 encaps_signal_handle) { if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION)) return 0; @@ -1093,6 +1213,9 @@ static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs, /* Staged CS sequence is the first CS sequence */ INIT_LIST_HEAD(&cs->staged_cs_node); cs->staged_sequence = cs->sequence; + + if (cs->encaps_signals) + cs->encaps_sig_hdl_id = encaps_signal_handle; } else { /* User sequence will be validated in 'hl_hw_queue_schedule_cs' * under the cs_mirror_lock @@ -1108,9 +1231,20 @@ static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs, return 0; } +static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid) +{ + int i; + + for (i = 0; i < hdev->stream_master_qid_arr_size; i++) + if (qid == hdev->stream_master_qid_arr[i]) + return BIT(i); + + return 0; +} + static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, u32 num_chunks, u64 *cs_seq, u32 flags, - u32 timeout) + u32 encaps_signals_handle, u32 timeout) { bool staged_mid, int_queues_only = true; struct hl_device *hdev = hpriv->hdev; @@ -1121,6 +1255,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, struct hl_cs *cs; struct hl_cb *cb; u64 user_sequence; + u8 stream_master_qid_map = 0; int rc, i; cntr = &hdev->aggregated_cs_counters; @@ -1148,7 +1283,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, hl_debugfs_add_cs(cs); - rc = cs_staged_submission(hdev, cs, user_sequence, flags); + rc = cs_staged_submission(hdev, cs, user_sequence, flags, + encaps_signals_handle); if (rc) goto free_cs_object; @@ -1179,9 +1315,20 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; } - if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW) + if (queue_type == QUEUE_TYPE_EXT || + queue_type == QUEUE_TYPE_HW) { int_queues_only = false; + /* + * store which stream are being used for external/HW + * queues of this CS + */ + if (hdev->supports_wait_for_multi_cs) + stream_master_qid_map |= + get_stream_master_qid_mask(hdev, + chunk->queue_index); + } + job = hl_cs_allocate_job(hdev, queue_type, is_kernel_allocated_cb); if (!job) { @@ -1242,6 +1389,13 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, goto free_cs_object; } + /* + * store the (external/HW queues) streams used by the CS in the + * fence object for multi-CS completion + */ + if (hdev->supports_wait_for_multi_cs) + cs->fence->stream_master_qid_map = stream_master_qid_map; + rc = hl_hw_queue_schedule_cs(cs); if (rc) { if (rc != -EAGAIN) @@ -1270,130 +1424,6 @@ out: return rc; } -static int pending_cb_create_job(struct hl_device *hdev, struct hl_ctx *ctx, - struct hl_cs *cs, struct hl_cb *cb, u32 size, u32 hw_queue_id) -{ - struct hw_queue_properties *hw_queue_prop; - struct hl_cs_counters_atomic *cntr; - struct hl_cs_job *job; - - hw_queue_prop = &hdev->asic_prop.hw_queues_props[hw_queue_id]; - cntr = &hdev->aggregated_cs_counters; - - job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true); - if (!job) { - atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); - atomic64_inc(&cntr->out_of_mem_drop_cnt); - dev_err(hdev->dev, "Failed to allocate a new job\n"); - return -ENOMEM; - } - - job->id = 0; - job->cs = cs; - job->user_cb = cb; - atomic_inc(&job->user_cb->cs_cnt); - job->user_cb_size = size; - job->hw_queue_id = hw_queue_id; - job->patched_cb = job->user_cb; - job->job_cb_size = job->user_cb_size; - - /* increment refcount as for external queues we get completion */ - cs_get(cs); - - cs->jobs_in_queue_cnt[job->hw_queue_id]++; - - list_add_tail(&job->cs_node, &cs->job_list); - - hl_debugfs_add_job(hdev, job); - - return 0; -} - -static int hl_submit_pending_cb(struct hl_fpriv *hpriv) -{ - struct hl_device *hdev = hpriv->hdev; - struct hl_ctx *ctx = hpriv->ctx; - struct hl_pending_cb *pending_cb, *tmp; - struct list_head local_cb_list; - struct hl_cs *cs; - struct hl_cb *cb; - u32 hw_queue_id; - u32 cb_size; - int process_list, rc = 0; - - if (list_empty(&ctx->pending_cb_list)) - return 0; - - process_list = atomic_cmpxchg(&ctx->thread_pending_cb_token, 1, 0); - - /* Only a single thread is allowed to process the list */ - if (!process_list) - return 0; - - if (list_empty(&ctx->pending_cb_list)) - goto free_pending_cb_token; - - /* move all list elements to a local list */ - INIT_LIST_HEAD(&local_cb_list); - spin_lock(&ctx->pending_cb_lock); - list_for_each_entry_safe(pending_cb, tmp, &ctx->pending_cb_list, - cb_node) - list_move_tail(&pending_cb->cb_node, &local_cb_list); - spin_unlock(&ctx->pending_cb_lock); - - rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs, 0, - hdev->timeout_jiffies); - if (rc) - goto add_list_elements; - - hl_debugfs_add_cs(cs); - - /* Iterate through pending cb list, create jobs and add to CS */ - list_for_each_entry(pending_cb, &local_cb_list, cb_node) { - cb = pending_cb->cb; - cb_size = pending_cb->cb_size; - hw_queue_id = pending_cb->hw_queue_id; - - rc = pending_cb_create_job(hdev, ctx, cs, cb, cb_size, - hw_queue_id); - if (rc) - goto free_cs_object; - } - - rc = hl_hw_queue_schedule_cs(cs); - if (rc) { - if (rc != -EAGAIN) - dev_err(hdev->dev, - "Failed to submit CS %d.%llu (%d)\n", - ctx->asid, cs->sequence, rc); - goto free_cs_object; - } - - /* pending cb was scheduled successfully */ - list_for_each_entry_safe(pending_cb, tmp, &local_cb_list, cb_node) { - list_del(&pending_cb->cb_node); - kfree(pending_cb); - } - - cs_put(cs); - - goto free_pending_cb_token; - -free_cs_object: - cs_rollback(hdev, cs); - cs_put(cs); -add_list_elements: - spin_lock(&ctx->pending_cb_lock); - list_for_each_entry_safe_reverse(pending_cb, tmp, &local_cb_list, - cb_node) - list_move(&pending_cb->cb_node, &ctx->pending_cb_list); - spin_unlock(&ctx->pending_cb_lock); -free_pending_cb_token: - atomic_set(&ctx->thread_pending_cb_token, 1); - - return rc; -} - static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, u64 *cs_seq) { @@ -1443,7 +1473,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, rc = 0; } else { rc = cs_ioctl_default(hpriv, chunks, num_chunks, - cs_seq, 0, hdev->timeout_jiffies); + cs_seq, 0, 0, hdev->timeout_jiffies); } mutex_unlock(&hpriv->restore_phase_mutex); @@ -1501,10 +1531,17 @@ out: * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case. * if the SOB value reaches the max value move to the other SOB reserved * to the queue. + * @hdev: pointer to device structure + * @q_idx: stream queue index + * @hw_sob: the H/W SOB used in this signal CS. + * @count: signals count + * @encaps_sig: tells whether it's reservation for encaps signals or not. + * * Note that this function must be called while hw_queues_lock is taken. */ int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx, - struct hl_hw_sob **hw_sob, u32 count) + struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig) + { struct hl_sync_stream_properties *prop; struct hl_hw_sob *sob = *hw_sob, *other_sob; @@ -1512,7 +1549,7 @@ int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx, prop = &hdev->kernel_queues[q_idx].sync_stream_prop; - kref_get(&sob->kref); + hw_sob_get(sob); /* check for wraparound */ if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) { @@ -1522,7 +1559,7 @@ int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx, * just incremented the refcount right before calling this * function. */ - kref_put(&sob->kref, hl_sob_reset_error); + hw_sob_put_err(sob); /* * check the other sob value, if it still in use then fail @@ -1537,12 +1574,42 @@ int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx, return -EINVAL; } - prop->next_sob_val = 1; + /* + * next_sob_val always points to the next available signal + * in the sob, so in encaps signals it will be the next one + * after reserving the required amount. + */ + if (encaps_sig) + prop->next_sob_val = count + 1; + else + prop->next_sob_val = count; /* only two SOBs are currently in use */ prop->curr_sob_offset = other_sob_offset; *hw_sob = other_sob; + /* + * check if other_sob needs reset, then do it before using it + * for the reservation or the next signal cs. + * we do it here, and for both encaps and regular signal cs + * cases in order to avoid possible races of two kref_put + * of the sob which can occur at the same time if we move the + * sob reset(kref_put) to cs_do_release function. + * in addition, if we have combination of cs signal and + * encaps, and at the point we need to reset the sob there was + * no more reservations and only signal cs keep coming, + * in such case we need signal_cs to put the refcount and + * reset the sob. + */ + if (other_sob->need_reset) + hw_sob_put(other_sob); + + if (encaps_sig) { + /* set reset indication for the sob */ + sob->need_reset = true; + hw_sob_get(other_sob); + } + dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", prop->curr_sob_offset, q_idx); } else { @@ -1553,12 +1620,18 @@ int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx, } static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, - struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx) + struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx, + bool encaps_signals) { u64 *signal_seq_arr = NULL; u32 size_to_copy, signal_seq_arr_len; int rc = 0; + if (encaps_signals) { + *signal_seq = chunk->encaps_signal_seq; + return 0; + } + signal_seq_arr_len = chunk->num_signal_seq_arr; /* currently only one signal seq is supported */ @@ -1583,7 +1656,7 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, return -ENOMEM; } - size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr); + size_to_copy = signal_seq_arr_len * sizeof(*signal_seq_arr); if (copy_from_user(signal_seq_arr, u64_to_user_ptr(chunk->signal_seq_arr), size_to_copy)) { @@ -1605,8 +1678,8 @@ out: } static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev, - struct hl_ctx *ctx, struct hl_cs *cs, enum hl_queue_type q_type, - u32 q_idx) + struct hl_ctx *ctx, struct hl_cs *cs, + enum hl_queue_type q_type, u32 q_idx, u32 encaps_signal_offset) { struct hl_cs_counters_atomic *cntr; struct hl_cs_job *job; @@ -1644,6 +1717,9 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev, job->user_cb_size = cb_size; job->hw_queue_id = q_idx; + if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) + && cs->encaps_signals) + job->encaps_sig_wait_offset = encaps_signal_offset; /* * No need in parsing, user CB is the patched CB. * We call hl_cb_destroy() out of two reasons - we don't need the CB in @@ -1666,11 +1742,196 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev, return 0; } +static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv, + u32 q_idx, u32 count, + u32 *handle_id, u32 *sob_addr, + u32 *signals_count) +{ + struct hw_queue_properties *hw_queue_prop; + struct hl_sync_stream_properties *prop; + struct hl_device *hdev = hpriv->hdev; + struct hl_cs_encaps_sig_handle *handle; + struct hl_encaps_signals_mgr *mgr; + struct hl_hw_sob *hw_sob; + int hdl_id; + int rc = 0; + + if (count >= HL_MAX_SOB_VAL) { + dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n", + count); + rc = -EINVAL; + goto out; + } + + if (q_idx >= hdev->asic_prop.max_queues) { + dev_err(hdev->dev, "Queue index %d is invalid\n", + q_idx); + rc = -EINVAL; + goto out; + } + + hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; + + if (!hw_queue_prop->supports_sync_stream) { + dev_err(hdev->dev, + "Queue index %d does not support sync stream operations\n", + q_idx); + rc = -EINVAL; + goto out; + } + + prop = &hdev->kernel_queues[q_idx].sync_stream_prop; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) { + rc = -ENOMEM; + goto out; + } + + handle->count = count; + mgr = &hpriv->ctx->sig_mgr; + + spin_lock(&mgr->lock); + hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC); + spin_unlock(&mgr->lock); + + if (hdl_id < 0) { + dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n"); + rc = -EINVAL; + goto out; + } + + handle->id = hdl_id; + handle->q_idx = q_idx; + handle->hdev = hdev; + kref_init(&handle->refcount); + + hdev->asic_funcs->hw_queues_lock(hdev); + + hw_sob = &prop->hw_sob[prop->curr_sob_offset]; + + /* + * Increment the SOB value by count by user request + * to reserve those signals + * check if the signals amount to reserve is not exceeding the max sob + * value, if yes then switch sob. + */ + rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, count, + true); + if (rc) { + dev_err(hdev->dev, "Failed to switch SOB\n"); + hdev->asic_funcs->hw_queues_unlock(hdev); + rc = -EINVAL; + goto remove_idr; + } + /* set the hw_sob to the handle after calling the sob wraparound handler + * since sob could have changed. + */ + handle->hw_sob = hw_sob; + + /* store the current sob value for unreserve validity check, and + * signal offset support + */ + handle->pre_sob_val = prop->next_sob_val - handle->count; + + *signals_count = prop->next_sob_val; + hdev->asic_funcs->hw_queues_unlock(hdev); + + *sob_addr = handle->hw_sob->sob_addr; + *handle_id = hdl_id; + + dev_dbg(hdev->dev, + "Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n", + hw_sob->sob_id, handle->hw_sob->sob_addr, + prop->next_sob_val - 1, q_idx, hdl_id); + goto out; + +remove_idr: + spin_lock(&mgr->lock); + idr_remove(&mgr->handles, hdl_id); + spin_unlock(&mgr->lock); + + kfree(handle); +out: + return rc; +} + +static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id) +{ + struct hl_cs_encaps_sig_handle *encaps_sig_hdl; + struct hl_sync_stream_properties *prop; + struct hl_device *hdev = hpriv->hdev; + struct hl_encaps_signals_mgr *mgr; + struct hl_hw_sob *hw_sob; + u32 q_idx, sob_addr; + int rc = 0; + + mgr = &hpriv->ctx->sig_mgr; + + spin_lock(&mgr->lock); + encaps_sig_hdl = idr_find(&mgr->handles, handle_id); + if (encaps_sig_hdl) { + dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n", + handle_id, encaps_sig_hdl->hw_sob->sob_addr, + encaps_sig_hdl->count); + + hdev->asic_funcs->hw_queues_lock(hdev); + + q_idx = encaps_sig_hdl->q_idx; + prop = &hdev->kernel_queues[q_idx].sync_stream_prop; + hw_sob = &prop->hw_sob[prop->curr_sob_offset]; + sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id); + + /* Check if sob_val got out of sync due to other + * signal submission requests which were handled + * between the reserve-unreserve calls or SOB switch + * upon reaching SOB max value. + */ + if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count + != prop->next_sob_val || + sob_addr != encaps_sig_hdl->hw_sob->sob_addr) { + dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n", + encaps_sig_hdl->pre_sob_val, + (prop->next_sob_val - encaps_sig_hdl->count)); + + hdev->asic_funcs->hw_queues_unlock(hdev); + rc = -EINVAL; + goto out; + } + + /* + * Decrement the SOB value by count by user request + * to unreserve those signals + */ + prop->next_sob_val -= encaps_sig_hdl->count; + + hdev->asic_funcs->hw_queues_unlock(hdev); + + hw_sob_put(hw_sob); + + /* Release the id and free allocated memory of the handle */ + idr_remove(&mgr->handles, handle_id); + kfree(encaps_sig_hdl); + } else { + rc = -EINVAL; + dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n"); + } +out: + spin_unlock(&mgr->lock); + + return rc; +} + static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, void __user *chunks, u32 num_chunks, u64 *cs_seq, u32 flags, u32 timeout) { + struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL; + bool handle_found = false, is_wait_cs = false, + wait_cs_submitted = false, + cs_encaps_signals = false; struct hl_cs_chunk *cs_chunk_array, *chunk; + bool staged_cs_with_encaps_signals = false; struct hw_queue_properties *hw_queue_prop; struct hl_device *hdev = hpriv->hdev; struct hl_cs_compl *sig_waitcs_cmpl; @@ -1730,11 +1991,58 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, collective_engine_id = chunk->collective_engine_id; } - if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) { - rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx); + is_wait_cs = !!(cs_type == CS_TYPE_WAIT || + cs_type == CS_TYPE_COLLECTIVE_WAIT); + + cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS); + + if (is_wait_cs) { + rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, + ctx, cs_encaps_signals); if (rc) goto free_cs_chunk_array; + if (cs_encaps_signals) { + /* check if cs sequence has encapsulated + * signals handle + */ + struct idr *idp; + u32 id; + + spin_lock(&ctx->sig_mgr.lock); + idp = &ctx->sig_mgr.handles; + idr_for_each_entry(idp, encaps_sig_hdl, id) { + if (encaps_sig_hdl->cs_seq == signal_seq) { + handle_found = true; + /* get refcount to protect removing + * this handle from idr, needed when + * multiple wait cs are used with offset + * to wait on reserved encaps signals. + */ + kref_get(&encaps_sig_hdl->refcount); + break; + } + } + spin_unlock(&ctx->sig_mgr.lock); + + if (!handle_found) { + dev_err(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n", + signal_seq); + rc = -EINVAL; + goto free_cs_chunk_array; + } + + /* validate also the signal offset value */ + if (chunk->encaps_signal_offset > + encaps_sig_hdl->count) { + dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n", + chunk->encaps_signal_offset, + encaps_sig_hdl->count); + rc = -EINVAL; + goto free_cs_chunk_array; + } + } + sig_fence = hl_ctx_get_fence(ctx, signal_seq); if (IS_ERR(sig_fence)) { atomic64_inc(&ctx->cs_counters.validation_drop_cnt); @@ -1755,11 +2063,16 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, sig_waitcs_cmpl = container_of(sig_fence, struct hl_cs_compl, base_fence); - if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) { + staged_cs_with_encaps_signals = !! + (sig_waitcs_cmpl->type == CS_TYPE_DEFAULT && + (flags & HL_CS_FLAGS_ENCAP_SIGNALS)); + + if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL && + !staged_cs_with_encaps_signals) { atomic64_inc(&ctx->cs_counters.validation_drop_cnt); atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, - "CS seq 0x%llx is not of a signal CS\n", + "CS seq 0x%llx is not of a signal/encaps-signal CS\n", signal_seq); hl_fence_put(sig_fence); rc = -EINVAL; @@ -1776,18 +2089,27 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout); if (rc) { - if (cs_type == CS_TYPE_WAIT || - cs_type == CS_TYPE_COLLECTIVE_WAIT) + if (is_wait_cs) hl_fence_put(sig_fence); + goto free_cs_chunk_array; } /* * Save the signal CS fence for later initialization right before * hanging the wait CS on the queue. + * for encaps signals case, we save the cs sequence and handle pointer + * for later initialization. */ - if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) + if (is_wait_cs) { cs->signal_fence = sig_fence; + /* store the handle pointer, so we don't have to + * look for it again, later on the flow + * when we need to set SOB info in hw_queue. + */ + if (cs->encaps_signals) + cs->encaps_sig_hdl = encaps_sig_hdl; + } hl_debugfs_add_cs(cs); @@ -1795,10 +2117,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL) rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type, - q_idx); + q_idx, chunk->encaps_signal_offset); else if (cs_type == CS_TYPE_COLLECTIVE_WAIT) rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx, - cs, q_idx, collective_engine_id); + cs, q_idx, collective_engine_id, + chunk->encaps_signal_offset); else { atomic64_inc(&ctx->cs_counters.validation_drop_cnt); atomic64_inc(&cntr->validation_drop_cnt); @@ -1810,7 +2133,13 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, rc = hl_hw_queue_schedule_cs(cs); if (rc) { - if (rc != -EAGAIN) + /* In case wait cs failed here, it means the signal cs + * already completed. we want to free all it's related objects + * but we don't want to fail the ioctl. + */ + if (is_wait_cs) + rc = 0; + else if (rc != -EAGAIN) dev_err(hdev->dev, "Failed to submit CS %d.%llu to H/W queues, error %d\n", ctx->asid, cs->sequence, rc); @@ -1818,6 +2147,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, } rc = HL_CS_STATUS_SUCCESS; + if (is_wait_cs) + wait_cs_submitted = true; goto put_cs; free_cs_object: @@ -1828,6 +2159,10 @@ put_cs: /* We finished with the CS in this function, so put the ref */ cs_put(cs); free_cs_chunk_array: + if (!wait_cs_submitted && cs_encaps_signals && handle_found && + is_wait_cs) + kref_put(&encaps_sig_hdl->refcount, + hl_encaps_handle_do_release); kfree(cs_chunk_array); out: return rc; @@ -1836,10 +2171,11 @@ out: int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) { union hl_cs_args *args = data; - enum hl_cs_type cs_type; + enum hl_cs_type cs_type = 0; u64 cs_seq = ULONG_MAX; void __user *chunks; - u32 num_chunks, flags, timeout; + u32 num_chunks, flags, timeout, + signals_count = 0, sob_addr = 0, handle_id = 0; int rc; rc = hl_cs_sanity_checks(hpriv, args); @@ -1850,10 +2186,6 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) if (rc) goto out; - rc = hl_submit_pending_cb(hpriv); - if (rc) - goto out; - cs_type = hl_cs_get_cs_type(args->in.cs_flags & ~HL_CS_FLAGS_FORCE_RESTORE); chunks = (void __user *) (uintptr_t) args->in.chunks_execute; @@ -1876,80 +2208,448 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks, &cs_seq, args->in.cs_flags, timeout); break; + case CS_RESERVE_SIGNALS: + rc = cs_ioctl_reserve_signals(hpriv, + args->in.encaps_signals_q_idx, + args->in.encaps_signals_count, + &handle_id, &sob_addr, &signals_count); + break; + case CS_UNRESERVE_SIGNALS: + rc = cs_ioctl_unreserve_signals(hpriv, + args->in.encaps_sig_handle_id); + break; default: rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq, - args->in.cs_flags, timeout); + args->in.cs_flags, + args->in.encaps_sig_handle_id, + timeout); break; } - out: if (rc != -EAGAIN) { memset(args, 0, sizeof(*args)); + + if (cs_type == CS_RESERVE_SIGNALS) { + args->out.handle_id = handle_id; + args->out.sob_base_addr_offset = sob_addr; + args->out.count = signals_count; + } else { + args->out.seq = cs_seq; + } args->out.status = rc; - args->out.seq = cs_seq; } return rc; } +static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence, + enum hl_cs_wait_status *status, u64 timeout_us, + s64 *timestamp) +{ + struct hl_device *hdev = ctx->hdev; + long completion_rc; + int rc = 0; + + if (IS_ERR(fence)) { + rc = PTR_ERR(fence); + if (rc == -EINVAL) + dev_notice_ratelimited(hdev->dev, + "Can't wait on CS %llu because current CS is at seq %llu\n", + seq, ctx->cs_sequence); + return rc; + } + + if (!fence) { + dev_dbg(hdev->dev, + "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", + seq, ctx->cs_sequence); + + *status = CS_WAIT_STATUS_GONE; + return 0; + } + + if (!timeout_us) { + completion_rc = completion_done(&fence->completion); + } else { + unsigned long timeout; + + timeout = (timeout_us == MAX_SCHEDULE_TIMEOUT) ? + timeout_us : usecs_to_jiffies(timeout_us); + completion_rc = + wait_for_completion_interruptible_timeout( + &fence->completion, timeout); + } + + if (completion_rc > 0) { + *status = CS_WAIT_STATUS_COMPLETED; + if (timestamp) + *timestamp = ktime_to_ns(fence->timestamp); + } else { + *status = CS_WAIT_STATUS_BUSY; + } + + if (fence->error == -ETIMEDOUT) + rc = -ETIMEDOUT; + else if (fence->error == -EIO) + rc = -EIO; + + return rc; +} + +/* + * hl_cs_poll_fences - iterate CS fences to check for CS completion + * + * @mcs_data: multi-CS internal data + * + * @return 0 on success, otherwise non 0 error code + * + * The function iterates on all CS sequence in the list and set bit in + * completion_bitmap for each completed CS. + * while iterating, the function can extracts the stream map to be later + * used by the waiting function. + * this function shall be called after taking context ref + */ +static int hl_cs_poll_fences(struct multi_cs_data *mcs_data) +{ + struct hl_fence **fence_ptr = mcs_data->fence_arr; + struct hl_device *hdev = mcs_data->ctx->hdev; + int i, rc, arr_len = mcs_data->arr_len; + u64 *seq_arr = mcs_data->seq_arr; + ktime_t max_ktime, first_cs_time; + enum hl_cs_wait_status status; + + memset(fence_ptr, 0, arr_len * sizeof(*fence_ptr)); + + /* get all fences under the same lock */ + rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len); + if (rc) + return rc; + + /* + * set to maximum time to verify timestamp is valid: if at the end + * this value is maintained- no timestamp was updated + */ + max_ktime = ktime_set(KTIME_SEC_MAX, 0); + first_cs_time = max_ktime; + + for (i = 0; i < arr_len; i++, fence_ptr++) { + struct hl_fence *fence = *fence_ptr; + + /* + * function won't sleep as it is called with timeout 0 (i.e. + * poll the fence) + */ + rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence, + &status, 0, NULL); + if (rc) { + dev_err(hdev->dev, + "wait_for_fence error :%d for CS seq %llu\n", + rc, seq_arr[i]); + break; + } + + mcs_data->stream_master_qid_map |= fence->stream_master_qid_map; + + if (status == CS_WAIT_STATUS_BUSY) + continue; + + mcs_data->completion_bitmap |= BIT(i); + + /* + * best effort to extract timestamp. few notes: + * - if even single fence is gone we cannot extract timestamp + * (as fence not exist anymore) + * - for all completed CSs we take the earliest timestamp. + * for this we have to validate that: + * 1. given timestamp was indeed set + * 2. the timestamp is earliest of all timestamps so far + */ + + if (status == CS_WAIT_STATUS_GONE) { + mcs_data->update_ts = false; + mcs_data->gone_cs = true; + } else if (mcs_data->update_ts && + (ktime_compare(fence->timestamp, + ktime_set(0, 0)) > 0) && + (ktime_compare(fence->timestamp, first_cs_time) < 0)) { + first_cs_time = fence->timestamp; + } + } + + hl_fences_put(mcs_data->fence_arr, arr_len); + + if (mcs_data->update_ts && + (ktime_compare(first_cs_time, max_ktime) != 0)) + mcs_data->timestamp = ktime_to_ns(first_cs_time); + + return rc; +} + static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq, enum hl_cs_wait_status *status, s64 *timestamp) { struct hl_fence *fence; - unsigned long timeout; int rc = 0; - long completion_rc; if (timestamp) *timestamp = 0; - if (timeout_us == MAX_SCHEDULE_TIMEOUT) - timeout = timeout_us; - else - timeout = usecs_to_jiffies(timeout_us); - hl_ctx_get(hdev, ctx); fence = hl_ctx_get_fence(ctx, seq); - if (IS_ERR(fence)) { - rc = PTR_ERR(fence); - if (rc == -EINVAL) - dev_notice_ratelimited(hdev->dev, - "Can't wait on CS %llu because current CS is at seq %llu\n", - seq, ctx->cs_sequence); - } else if (fence) { - if (!timeout_us) - completion_rc = completion_done(&fence->completion); - else - completion_rc = - wait_for_completion_interruptible_timeout( - &fence->completion, timeout); - if (completion_rc > 0) { - *status = CS_WAIT_STATUS_COMPLETED; - if (timestamp) - *timestamp = ktime_to_ns(fence->timestamp); - } else { - *status = CS_WAIT_STATUS_BUSY; + rc = hl_wait_for_fence(ctx, seq, fence, status, timeout_us, timestamp); + hl_fence_put(fence); + hl_ctx_put(ctx); + + return rc; +} + +/* + * hl_wait_multi_cs_completion_init - init completion structure + * + * @hdev: pointer to habanalabs device structure + * @stream_master_bitmap: stream master QIDs map, set bit indicates stream + * master QID to wait on + * + * @return valid completion struct pointer on success, otherwise error pointer + * + * up to MULTI_CS_MAX_USER_CTX calls can be done concurrently to the driver. + * the function gets the first available completion (by marking it "used") + * and initialize its values. + */ +static struct multi_cs_completion *hl_wait_multi_cs_completion_init( + struct hl_device *hdev, + u8 stream_master_bitmap) +{ + struct multi_cs_completion *mcs_compl; + int i; + + /* find free multi_cs completion structure */ + for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { + mcs_compl = &hdev->multi_cs_completion[i]; + spin_lock(&mcs_compl->lock); + if (!mcs_compl->used) { + mcs_compl->used = 1; + mcs_compl->timestamp = 0; + mcs_compl->stream_master_qid_map = stream_master_bitmap; + reinit_completion(&mcs_compl->completion); + spin_unlock(&mcs_compl->lock); + break; } + spin_unlock(&mcs_compl->lock); + } - if (fence->error == -ETIMEDOUT) - rc = -ETIMEDOUT; - else if (fence->error == -EIO) - rc = -EIO; + if (i == MULTI_CS_MAX_USER_CTX) { + dev_err(hdev->dev, + "no available multi-CS completion structure\n"); + return ERR_PTR(-ENOMEM); + } + return mcs_compl; +} - hl_fence_put(fence); - } else { - dev_dbg(hdev->dev, - "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", - seq, ctx->cs_sequence); - *status = CS_WAIT_STATUS_GONE; +/* + * hl_wait_multi_cs_completion_fini - return completion structure and set as + * unused + * + * @mcs_compl: pointer to the completion structure + */ +static void hl_wait_multi_cs_completion_fini( + struct multi_cs_completion *mcs_compl) +{ + /* + * free completion structure, do it under lock to be in-sync with the + * thread that signals completion + */ + spin_lock(&mcs_compl->lock); + mcs_compl->used = 0; + spin_unlock(&mcs_compl->lock); +} + +/* + * hl_wait_multi_cs_completion - wait for first CS to complete + * + * @mcs_data: multi-CS internal data + * + * @return 0 on success, otherwise non 0 error code + */ +static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data) +{ + struct hl_device *hdev = mcs_data->ctx->hdev; + struct multi_cs_completion *mcs_compl; + long completion_rc; + + mcs_compl = hl_wait_multi_cs_completion_init(hdev, + mcs_data->stream_master_qid_map); + if (IS_ERR(mcs_compl)) + return PTR_ERR(mcs_compl); + + completion_rc = wait_for_completion_interruptible_timeout( + &mcs_compl->completion, + usecs_to_jiffies(mcs_data->timeout_us)); + + /* update timestamp */ + if (completion_rc > 0) + mcs_data->timestamp = mcs_compl->timestamp; + + hl_wait_multi_cs_completion_fini(mcs_compl); + + mcs_data->wait_status = completion_rc; + + return 0; +} + +/* + * hl_multi_cs_completion_init - init array of multi-CS completion structures + * + * @hdev: pointer to habanalabs device structure + */ +void hl_multi_cs_completion_init(struct hl_device *hdev) +{ + struct multi_cs_completion *mcs_cmpl; + int i; + + for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { + mcs_cmpl = &hdev->multi_cs_completion[i]; + mcs_cmpl->used = 0; + spin_lock_init(&mcs_cmpl->lock); + init_completion(&mcs_cmpl->completion); + } +} + +/* + * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl + * + * @hpriv: pointer to the private data of the fd + * @data: pointer to multi-CS wait ioctl in/out args + * + */ +static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) +{ + struct hl_device *hdev = hpriv->hdev; + struct multi_cs_data mcs_data = {0}; + union hl_wait_cs_args *args = data; + struct hl_ctx *ctx = hpriv->ctx; + struct hl_fence **fence_arr; + void __user *seq_arr; + u32 size_to_copy; + u64 *cs_seq_arr; + u8 seq_arr_len; + int rc; + + if (!hdev->supports_wait_for_multi_cs) { + dev_err(hdev->dev, "Wait for multi CS is not supported\n"); + return -EPERM; + } + + seq_arr_len = args->in.seq_arr_len; + + if (seq_arr_len > HL_WAIT_MULTI_CS_LIST_MAX_LEN) { + dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n", + HL_WAIT_MULTI_CS_LIST_MAX_LEN, seq_arr_len); + return -EINVAL; + } + + /* allocate memory for sequence array */ + cs_seq_arr = + kmalloc_array(seq_arr_len, sizeof(*cs_seq_arr), GFP_KERNEL); + if (!cs_seq_arr) + return -ENOMEM; + + /* copy CS sequence array from user */ + seq_arr = (void __user *) (uintptr_t) args->in.seq; + size_to_copy = seq_arr_len * sizeof(*cs_seq_arr); + if (copy_from_user(cs_seq_arr, seq_arr, size_to_copy)) { + dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n"); + rc = -EFAULT; + goto free_seq_arr; + } + + /* allocate array for the fences */ + fence_arr = kmalloc_array(seq_arr_len, sizeof(*fence_arr), GFP_KERNEL); + if (!fence_arr) { + rc = -ENOMEM; + goto free_seq_arr; + } + + /* initialize the multi-CS internal data */ + mcs_data.ctx = ctx; + mcs_data.seq_arr = cs_seq_arr; + mcs_data.fence_arr = fence_arr; + mcs_data.arr_len = seq_arr_len; + + hl_ctx_get(hdev, ctx); + + /* poll all CS fences, extract timestamp */ + mcs_data.update_ts = true; + rc = hl_cs_poll_fences(&mcs_data); + /* + * skip wait for CS completion when one of the below is true: + * - an error on the poll function + * - one or more CS in the list completed + * - the user called ioctl with timeout 0 + */ + if (rc || mcs_data.completion_bitmap || !args->in.timeout_us) + goto put_ctx; + + /* wait (with timeout) for the first CS to be completed */ + mcs_data.timeout_us = args->in.timeout_us; + rc = hl_wait_multi_cs_completion(&mcs_data); + if (rc) + goto put_ctx; + + if (mcs_data.wait_status > 0) { + /* + * poll fences once again to update the CS map. + * no timestamp should be updated this time. + */ + mcs_data.update_ts = false; + rc = hl_cs_poll_fences(&mcs_data); + + /* + * if hl_wait_multi_cs_completion returned before timeout (i.e. + * it got a completion) we expect to see at least one CS + * completed after the poll function. + */ + if (!mcs_data.completion_bitmap) { + dev_err(hdev->dev, "Multi-CS got completion on wait but no CS completed\n"); + rc = -EFAULT; + } } +put_ctx: hl_ctx_put(ctx); + kfree(fence_arr); - return rc; +free_seq_arr: + kfree(cs_seq_arr); + + /* update output args */ + memset(args, 0, sizeof(*args)); + if (rc) + return rc; + + if (mcs_data.completion_bitmap) { + args->out.status = HL_WAIT_CS_STATUS_COMPLETED; + args->out.cs_completion_map = mcs_data.completion_bitmap; + + /* if timestamp not 0- it's valid */ + if (mcs_data.timestamp) { + args->out.timestamp_nsec = mcs_data.timestamp; + args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; + } + + /* update if some CS was gone */ + if (mcs_data.timestamp) + args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; + } else if (mcs_data.wait_status == -ERESTARTSYS) { + args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED; + } else { + args->out.status = HL_WAIT_CS_STATUS_BUSY; + } + + return 0; } static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) @@ -2015,9 +2715,9 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, { struct hl_user_pending_interrupt *pend; struct hl_user_interrupt *interrupt; - unsigned long timeout; - long completion_rc; + unsigned long timeout, flags; u32 completion_value; + long completion_rc; int rc = 0; if (timeout_us == U32_MAX) @@ -2040,17 +2740,10 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, else interrupt = &hdev->user_interrupt[interrupt_offset]; - spin_lock(&interrupt->wait_list_lock); - if (!hl_device_operational(hdev, NULL)) { - rc = -EPERM; - goto unlock_and_free_fence; - } - if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) { - dev_err(hdev->dev, - "Failed to copy completion value from user\n"); + dev_err(hdev->dev, "Failed to copy completion value from user\n"); rc = -EFAULT; - goto unlock_and_free_fence; + goto free_fence; } if (completion_value >= target_value) @@ -2059,48 +2752,57 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, *status = CS_WAIT_STATUS_BUSY; if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED)) - goto unlock_and_free_fence; + goto free_fence; /* Add pending user interrupt to relevant list for the interrupt * handler to monitor */ + spin_lock_irqsave(&interrupt->wait_list_lock, flags); list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); - spin_unlock(&interrupt->wait_list_lock); + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); wait_again: /* Wait for interrupt handler to signal completion */ - completion_rc = - wait_for_completion_interruptible_timeout( - &pend->fence.completion, timeout); + completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, + timeout); /* If timeout did not expire we need to perform the comparison. * If comparison fails, keep waiting until timeout expires */ if (completion_rc > 0) { - if (copy_from_user(&completion_value, - u64_to_user_ptr(user_address), 4)) { - dev_err(hdev->dev, - "Failed to copy completion value from user\n"); + if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) { + dev_err(hdev->dev, "Failed to copy completion value from user\n"); rc = -EFAULT; + goto remove_pending_user_interrupt; } if (completion_value >= target_value) { *status = CS_WAIT_STATUS_COMPLETED; } else { + spin_lock_irqsave(&interrupt->wait_list_lock, flags); + reinit_completion(&pend->fence.completion); timeout = completion_rc; + + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); goto wait_again; } + } else if (completion_rc == -ERESTARTSYS) { + dev_err_ratelimited(hdev->dev, + "user process got signal while waiting for interrupt ID %d\n", + interrupt->interrupt_id); + *status = HL_WAIT_CS_STATUS_INTERRUPTED; + rc = -EINTR; } else { *status = CS_WAIT_STATUS_BUSY; } remove_pending_user_interrupt: - spin_lock(&interrupt->wait_list_lock); + spin_lock_irqsave(&interrupt->wait_list_lock, flags); list_del(&pend->wait_list_node); + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); -unlock_and_free_fence: - spin_unlock(&interrupt->wait_list_lock); +free_fence: kfree(pend); hl_ctx_put(ctx); @@ -2148,8 +2850,9 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) memset(args, 0, sizeof(*args)); if (rc) { - dev_err_ratelimited(hdev->dev, - "interrupt_wait_ioctl failed (%d)\n", rc); + if (rc != -EINTR) + dev_err_ratelimited(hdev->dev, + "interrupt_wait_ioctl failed (%d)\n", rc); return rc; } @@ -2173,8 +2876,16 @@ int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data) u32 flags = args->in.flags; int rc; + /* If the device is not operational, no point in waiting for any command submission or + * user interrupt + */ + if (!hl_device_operational(hpriv->hdev, NULL)) + return -EPERM; + if (flags & HL_WAIT_CS_FLAGS_INTERRUPT) rc = hl_interrupt_wait_ioctl(hpriv, data); + else if (flags & HL_WAIT_CS_FLAGS_MULTI_CS) + rc = hl_multi_cs_wait_ioctl(hpriv, data); else rc = hl_cs_wait_ioctl(hpriv, data); diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index 19b6b045219e..22978303ad63 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -9,16 +9,70 @@ #include <linux/slab.h> +void hl_encaps_handle_do_release(struct kref *ref) +{ + struct hl_cs_encaps_sig_handle *handle = + container_of(ref, struct hl_cs_encaps_sig_handle, refcount); + struct hl_ctx *ctx = handle->hdev->compute_ctx; + struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr; + + spin_lock(&mgr->lock); + idr_remove(&mgr->handles, handle->id); + spin_unlock(&mgr->lock); + + kfree(handle); +} + +static void hl_encaps_handle_do_release_sob(struct kref *ref) +{ + struct hl_cs_encaps_sig_handle *handle = + container_of(ref, struct hl_cs_encaps_sig_handle, refcount); + struct hl_ctx *ctx = handle->hdev->compute_ctx; + struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr; + + /* if we're here, then there was a signals reservation but cs with + * encaps signals wasn't submitted, so need to put refcount + * to hw_sob taken at the reservation. + */ + hw_sob_put(handle->hw_sob); + + spin_lock(&mgr->lock); + idr_remove(&mgr->handles, handle->id); + spin_unlock(&mgr->lock); + + kfree(handle); +} + +static void hl_encaps_sig_mgr_init(struct hl_encaps_signals_mgr *mgr) +{ + spin_lock_init(&mgr->lock); + idr_init(&mgr->handles); +} + +static void hl_encaps_sig_mgr_fini(struct hl_device *hdev, + struct hl_encaps_signals_mgr *mgr) +{ + struct hl_cs_encaps_sig_handle *handle; + struct idr *idp; + u32 id; + + idp = &mgr->handles; + + if (!idr_is_empty(idp)) { + dev_warn(hdev->dev, "device released while some encaps signals handles are still allocated\n"); + idr_for_each_entry(idp, handle, id) + kref_put(&handle->refcount, + hl_encaps_handle_do_release_sob); + } + + idr_destroy(&mgr->handles); +} + static void hl_ctx_fini(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; int i; - /* Release all allocated pending cb's, those cb's were never - * scheduled so it is safe to release them here - */ - hl_pending_cb_list_flush(ctx); - /* Release all allocated HW block mapped list entries and destroy * the mutex. */ @@ -53,6 +107,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx) hl_cb_va_pool_fini(ctx); hl_vm_ctx_fini(ctx); hl_asid_free(hdev, ctx->asid); + hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr); /* Scrub both SRAM and DRAM */ hdev->asic_funcs->scrub_device_mem(hdev, 0, 0); @@ -130,9 +185,6 @@ void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx) { if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1) return; - - dev_warn(hdev->dev, - "user process released device but its command submissions are still executing\n"); } int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) @@ -144,11 +196,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) kref_init(&ctx->refcount); ctx->cs_sequence = 1; - INIT_LIST_HEAD(&ctx->pending_cb_list); - spin_lock_init(&ctx->pending_cb_lock); spin_lock_init(&ctx->cs_lock); atomic_set(&ctx->thread_ctx_switch_token, 1); - atomic_set(&ctx->thread_pending_cb_token, 1); ctx->thread_ctx_switch_wait_token = 0; ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs, sizeof(struct hl_fence *), @@ -200,6 +249,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) goto err_cb_va_pool_fini; } + hl_encaps_sig_mgr_init(&ctx->sig_mgr); + dev_dbg(hdev->dev, "create user context %d\n", ctx->asid); } @@ -229,25 +280,40 @@ int hl_ctx_put(struct hl_ctx *ctx) return kref_put(&ctx->refcount, hl_ctx_do_release); } -struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq) +/* + * hl_ctx_get_fence_locked - get CS fence under CS lock + * + * @ctx: pointer to the context structure. + * @seq: CS sequences number + * + * @return valid fence pointer on success, NULL if fence is gone, otherwise + * error pointer. + * + * NOTE: this function shall be called with cs_lock locked + */ +static struct hl_fence *hl_ctx_get_fence_locked(struct hl_ctx *ctx, u64 seq) { struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop; struct hl_fence *fence; - spin_lock(&ctx->cs_lock); - - if (seq >= ctx->cs_sequence) { - spin_unlock(&ctx->cs_lock); + if (seq >= ctx->cs_sequence) return ERR_PTR(-EINVAL); - } - if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) { - spin_unlock(&ctx->cs_lock); + if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) return NULL; - } fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]; hl_fence_get(fence); + return fence; +} + +struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq) +{ + struct hl_fence *fence; + + spin_lock(&ctx->cs_lock); + + fence = hl_ctx_get_fence_locked(ctx, seq); spin_unlock(&ctx->cs_lock); @@ -255,6 +321,46 @@ struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq) } /* + * hl_ctx_get_fences - get multiple CS fences under the same CS lock + * + * @ctx: pointer to the context structure. + * @seq_arr: array of CS sequences to wait for + * @fence: fence array to store the CS fences + * @arr_len: length of seq_arr and fence_arr + * + * @return 0 on success, otherwise non 0 error code + */ +int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr, + struct hl_fence **fence, u32 arr_len) +{ + struct hl_fence **fence_arr_base = fence; + int i, rc = 0; + + spin_lock(&ctx->cs_lock); + + for (i = 0; i < arr_len; i++, fence++) { + u64 seq = seq_arr[i]; + + *fence = hl_ctx_get_fence_locked(ctx, seq); + + if (IS_ERR(*fence)) { + dev_err(ctx->hdev->dev, + "Failed to get fence for CS with seq 0x%llx\n", + seq); + rc = PTR_ERR(*fence); + break; + } + } + + spin_unlock(&ctx->cs_lock); + + if (rc) + hl_fences_put(fence_arr_base, i); + + return rc; +} + +/* * hl_ctx_mgr_init - initialize the context manager * * @mgr: pointer to context manager structure diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 703d79fb6f3f..985f1f3dbd20 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -209,12 +209,12 @@ static int userptr_show(struct seq_file *s, void *data) if (first) { first = false; seq_puts(s, "\n"); - seq_puts(s, " user virtual address size dma dir\n"); + seq_puts(s, " pid user virtual address size dma dir\n"); seq_puts(s, "----------------------------------------------------------\n"); } - seq_printf(s, - " 0x%-14llx %-10u %-30s\n", - userptr->addr, userptr->size, dma_dir[userptr->dir]); + seq_printf(s, " %-7d 0x%-14llx %-10llu %-30s\n", + userptr->pid, userptr->addr, userptr->size, + dma_dir[userptr->dir]); } spin_unlock(&dev_entry->userptr_spinlock); @@ -235,7 +235,7 @@ static int vm_show(struct seq_file *s, void *data) struct hl_vm_hash_node *hnode; struct hl_userptr *userptr; struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; - enum vm_type_t *vm_type; + enum vm_type *vm_type; bool once = true; u64 j; int i; @@ -261,7 +261,7 @@ static int vm_show(struct seq_file *s, void *data) if (*vm_type == VM_TYPE_USERPTR) { userptr = hnode->ptr; seq_printf(s, - " 0x%-14llx %-10u\n", + " 0x%-14llx %-10llu\n", hnode->vaddr, userptr->size); } else { phys_pg_pack = hnode->ptr; @@ -320,6 +320,77 @@ static int vm_show(struct seq_file *s, void *data) return 0; } +static int userptr_lookup_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct scatterlist *sg; + struct hl_userptr *userptr; + bool first = true; + u64 total_npages, npages, sg_start, sg_end; + dma_addr_t dma_addr; + int i; + + spin_lock(&dev_entry->userptr_spinlock); + + list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) { + if (dev_entry->userptr_lookup >= userptr->addr && + dev_entry->userptr_lookup < userptr->addr + userptr->size) { + total_npages = 0; + for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, + i) { + npages = hl_get_sg_info(sg, &dma_addr); + sg_start = userptr->addr + + total_npages * PAGE_SIZE; + sg_end = userptr->addr + + (total_npages + npages) * PAGE_SIZE; + + if (dev_entry->userptr_lookup >= sg_start && + dev_entry->userptr_lookup < sg_end) { + dma_addr += (dev_entry->userptr_lookup - + sg_start); + if (first) { + first = false; + seq_puts(s, "\n"); + seq_puts(s, " user virtual address dma address pid region start region size\n"); + seq_puts(s, "---------------------------------------------------------------------------------------\n"); + } + seq_printf(s, " 0x%-18llx 0x%-16llx %-8u 0x%-16llx %-12llu\n", + dev_entry->userptr_lookup, + (u64)dma_addr, userptr->pid, + userptr->addr, userptr->size); + } + total_npages += npages; + } + } + } + + spin_unlock(&dev_entry->userptr_spinlock); + + if (!first) + seq_puts(s, "\n"); + + return 0; +} + +static ssize_t userptr_lookup_write(struct file *file, const char __user *buf, + size_t count, loff_t *f_pos) +{ + struct seq_file *s = file->private_data; + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + ssize_t rc; + u64 value; + + rc = kstrtoull_from_user(buf, count, 16, &value); + if (rc) + return rc; + + dev_entry->userptr_lookup = value; + + return count; +} + static int mmu_show(struct seq_file *s, void *data) { struct hl_debugfs_entry *entry = s->private; @@ -349,7 +420,7 @@ static int mmu_show(struct seq_file *s, void *data) return 0; } - phys_addr = hops_info.hop_info[hops_info.used_hops - 1].hop_pte_val; + hl_mmu_va_to_pa(ctx, virt_addr, &phys_addr); if (hops_info.scrambled_vaddr && (dev_entry->mmu_addr != hops_info.scrambled_vaddr)) @@ -491,11 +562,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size, struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_ctx *ctx = hdev->compute_ctx; struct hl_vm_hash_node *hnode; + u64 end_address, range_size; struct hl_userptr *userptr; - enum vm_type_t *vm_type; + enum vm_type *vm_type; bool valid = false; - u64 end_address; - u32 range_size; int i, rc = 0; if (!ctx) { @@ -1043,6 +1113,60 @@ static ssize_t hl_security_violations_read(struct file *f, char __user *buf, return 0; } +static ssize_t hl_state_dump_read(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + ssize_t rc; + + down_read(&entry->state_dump_sem); + if (!entry->state_dump[entry->state_dump_head]) + rc = 0; + else + rc = simple_read_from_buffer( + buf, count, ppos, + entry->state_dump[entry->state_dump_head], + strlen(entry->state_dump[entry->state_dump_head])); + up_read(&entry->state_dump_sem); + + return rc; +} + +static ssize_t hl_state_dump_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + ssize_t rc; + u32 size; + int i; + + rc = kstrtouint_from_user(buf, count, 10, &size); + if (rc) + return rc; + + if (size <= 0 || size >= ARRAY_SIZE(entry->state_dump)) { + dev_err(hdev->dev, "Invalid number of dumps to skip\n"); + return -EINVAL; + } + + if (entry->state_dump[entry->state_dump_head]) { + down_write(&entry->state_dump_sem); + for (i = 0; i < size; ++i) { + vfree(entry->state_dump[entry->state_dump_head]); + entry->state_dump[entry->state_dump_head] = NULL; + if (entry->state_dump_head > 0) + entry->state_dump_head--; + else + entry->state_dump_head = + ARRAY_SIZE(entry->state_dump) - 1; + } + up_write(&entry->state_dump_sem); + } + + return count; +} + static const struct file_operations hl_data32b_fops = { .owner = THIS_MODULE, .read = hl_data_read32, @@ -1110,12 +1234,19 @@ static const struct file_operations hl_security_violations_fops = { .read = hl_security_violations_read }; +static const struct file_operations hl_state_dump_fops = { + .owner = THIS_MODULE, + .read = hl_state_dump_read, + .write = hl_state_dump_write +}; + static const struct hl_info_list hl_debugfs_list[] = { {"command_buffers", command_buffers_show, NULL}, {"command_submission", command_submission_show, NULL}, {"command_submission_jobs", command_submission_jobs_show, NULL}, {"userptr", userptr_show, NULL}, {"vm", vm_show, NULL}, + {"userptr_lookup", userptr_lookup_show, userptr_lookup_write}, {"mmu", mmu_show, mmu_asid_va_write}, {"engines", engines_show, NULL} }; @@ -1172,6 +1303,7 @@ void hl_debugfs_add_device(struct hl_device *hdev) INIT_LIST_HEAD(&dev_entry->userptr_list); INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list); mutex_init(&dev_entry->file_mutex); + init_rwsem(&dev_entry->state_dump_sem); spin_lock_init(&dev_entry->cb_spinlock); spin_lock_init(&dev_entry->cs_spinlock); spin_lock_init(&dev_entry->cs_job_spinlock); @@ -1283,6 +1415,12 @@ void hl_debugfs_add_device(struct hl_device *hdev) dev_entry->root, &hdev->skip_reset_on_timeout); + debugfs_create_file("state_dump", + 0600, + dev_entry->root, + dev_entry, + &hl_state_dump_fops); + for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { debugfs_create_file(hl_debugfs_list[i].name, 0444, @@ -1297,6 +1435,7 @@ void hl_debugfs_add_device(struct hl_device *hdev) void hl_debugfs_remove_device(struct hl_device *hdev) { struct hl_dbg_device_entry *entry = &hdev->hl_debugfs; + int i; debugfs_remove_recursive(entry->root); @@ -1304,6 +1443,9 @@ void hl_debugfs_remove_device(struct hl_device *hdev) vfree(entry->blob_desc.data); + for (i = 0; i < ARRAY_SIZE(entry->state_dump); ++i) + vfree(entry->state_dump[i]); + kfree(entry->entry_arr); } @@ -1416,6 +1558,28 @@ void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx) spin_unlock(&dev_entry->ctx_mem_hash_spinlock); } +/** + * hl_debugfs_set_state_dump - register state dump making it accessible via + * debugfs + * @hdev: pointer to the device structure + * @data: the actual dump data + * @length: the length of the data + */ +void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data, + unsigned long length) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + + down_write(&dev_entry->state_dump_sem); + + dev_entry->state_dump_head = (dev_entry->state_dump_head + 1) % + ARRAY_SIZE(dev_entry->state_dump); + vfree(dev_entry->state_dump[dev_entry->state_dump_head]); + dev_entry->state_dump[dev_entry->state_dump_head] = data; + + up_write(&dev_entry->state_dump_sem); +} + void __init hl_debugfs_init(void) { hl_debug_root = debugfs_create_dir("habanalabs", NULL); diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index ff4cbde289c0..97c7c86580e6 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -7,11 +7,11 @@ #define pr_fmt(fmt) "habanalabs: " fmt +#include <uapi/misc/habanalabs.h> #include "habanalabs.h" #include <linux/pci.h> #include <linux/hwmon.h> -#include <uapi/misc/habanalabs.h> enum hl_device_status hl_device_status(struct hl_device *hdev) { @@ -23,6 +23,8 @@ enum hl_device_status hl_device_status(struct hl_device *hdev) status = HL_DEVICE_STATUS_NEEDS_RESET; else if (hdev->disabled) status = HL_DEVICE_STATUS_MALFUNCTION; + else if (!hdev->init_done) + status = HL_DEVICE_STATUS_IN_DEVICE_CREATION; else status = HL_DEVICE_STATUS_OPERATIONAL; @@ -44,6 +46,7 @@ bool hl_device_operational(struct hl_device *hdev, case HL_DEVICE_STATUS_NEEDS_RESET: return false; case HL_DEVICE_STATUS_OPERATIONAL: + case HL_DEVICE_STATUS_IN_DEVICE_CREATION: default: return true; } @@ -129,8 +132,8 @@ static int hl_device_release(struct inode *inode, struct file *filp) hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); if (!hl_hpriv_put(hpriv)) - dev_warn(hdev->dev, - "Device is still in use because there are live CS and/or memory mappings\n"); + dev_notice(hdev->dev, + "User process closed FD but device still in use\n"); hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif; @@ -308,9 +311,15 @@ static void device_hard_reset_pending(struct work_struct *work) container_of(work, struct hl_device_reset_work, reset_work.work); struct hl_device *hdev = device_reset_work->hdev; + u32 flags; int rc; - rc = hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD); + flags = HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD; + + if (device_reset_work->fw_reset) + flags |= HL_RESET_FW; + + rc = hl_device_reset(hdev, flags); if ((rc == -EBUSY) && !hdev->device_fini_pending) { dev_info(hdev->dev, "Could not reset device. will try again in %u seconds", @@ -682,6 +691,44 @@ out: return rc; } +static void take_release_locks(struct hl_device *hdev) +{ + /* Flush anyone that is inside the critical section of enqueue + * jobs to the H/W + */ + hdev->asic_funcs->hw_queues_lock(hdev); + hdev->asic_funcs->hw_queues_unlock(hdev); + + /* Flush processes that are sending message to CPU */ + mutex_lock(&hdev->send_cpu_message_lock); + mutex_unlock(&hdev->send_cpu_message_lock); + + /* Flush anyone that is inside device open */ + mutex_lock(&hdev->fpriv_list_lock); + mutex_unlock(&hdev->fpriv_list_lock); +} + +static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset) +{ + if (hard_reset) + device_late_fini(hdev); + + /* + * Halt the engines and disable interrupts so we won't get any more + * completions from H/W and we won't have any accesses from the + * H/W to the host machine + */ + hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset); + + /* Go over all the queues, release all CS and their jobs */ + hl_cs_rollback_all(hdev); + + /* Release all pending user interrupts, each pending user interrupt + * holds a reference to user context + */ + hl_release_pending_user_interrupts(hdev); +} + /* * hl_device_suspend - initiate device suspend * @@ -707,16 +754,7 @@ int hl_device_suspend(struct hl_device *hdev) /* This blocks all other stuff that is not blocked by in_reset */ hdev->disabled = true; - /* - * Flush anyone that is inside the critical section of enqueue - * jobs to the H/W - */ - hdev->asic_funcs->hw_queues_lock(hdev); - hdev->asic_funcs->hw_queues_unlock(hdev); - - /* Flush processes that are sending message to CPU */ - mutex_lock(&hdev->send_cpu_message_lock); - mutex_unlock(&hdev->send_cpu_message_lock); + take_release_locks(hdev); rc = hdev->asic_funcs->suspend(hdev); if (rc) @@ -819,6 +857,11 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout) usleep_range(1000, 10000); put_task_struct(task); + } else { + dev_warn(hdev->dev, + "Can't get task struct for PID so giving up on killing process\n"); + mutex_unlock(&hdev->fpriv_list_lock); + return -ETIME; } } @@ -885,7 +928,7 @@ static void device_disable_open_processes(struct hl_device *hdev) int hl_device_reset(struct hl_device *hdev, u32 flags) { u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; - bool hard_reset, from_hard_reset_thread, hard_instead_soft = false; + bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false; int i, rc; if (!hdev->init_done) { @@ -894,8 +937,9 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) return 0; } - hard_reset = (flags & HL_RESET_HARD) != 0; - from_hard_reset_thread = (flags & HL_RESET_FROM_RESET_THREAD) != 0; + hard_reset = !!(flags & HL_RESET_HARD); + from_hard_reset_thread = !!(flags & HL_RESET_FROM_RESET_THREAD); + fw_reset = !!(flags & HL_RESET_FW); if (!hard_reset && !hdev->supports_soft_reset) { hard_instead_soft = true; @@ -947,11 +991,13 @@ do_reset: else hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; - /* - * if reset is due to heartbeat, device CPU is no responsive in - * which case no point sending PCI disable message to it + /* If reset is due to heartbeat, device CPU is no responsive in + * which case no point sending PCI disable message to it. + * + * If F/W is performing the reset, no need to send it a message to disable + * PCI access */ - if (hard_reset && !(flags & HL_RESET_HEARTBEAT)) { + if (hard_reset && !(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) { /* Disable PCI access from device F/W so he won't send * us additional interrupts. We disable MSI/MSI-X at * the halt_engines function and we can't have the F/W @@ -970,15 +1016,7 @@ do_reset: /* This also blocks future CS/VM/JOB completion operations */ hdev->disabled = true; - /* Flush anyone that is inside the critical section of enqueue - * jobs to the H/W - */ - hdev->asic_funcs->hw_queues_lock(hdev); - hdev->asic_funcs->hw_queues_unlock(hdev); - - /* Flush anyone that is inside device open */ - mutex_lock(&hdev->fpriv_list_lock); - mutex_unlock(&hdev->fpriv_list_lock); + take_release_locks(hdev); dev_err(hdev->dev, "Going to RESET device!\n"); } @@ -989,6 +1027,8 @@ again: hdev->process_kill_trial_cnt = 0; + hdev->device_reset_work.fw_reset = fw_reset; + /* * Because the reset function can't run from heartbeat work, * we need to call the reset function from a dedicated work. @@ -999,31 +1039,7 @@ again: return 0; } - if (hard_reset) { - device_late_fini(hdev); - - /* - * Now that the heartbeat thread is closed, flush processes - * which are sending messages to CPU - */ - mutex_lock(&hdev->send_cpu_message_lock); - mutex_unlock(&hdev->send_cpu_message_lock); - } - - /* - * Halt the engines and disable interrupts so we won't get any more - * completions from H/W and we won't have any accesses from the - * H/W to the host machine - */ - hdev->asic_funcs->halt_engines(hdev, hard_reset); - - /* Go over all the queues, release all CS and their jobs */ - hl_cs_rollback_all(hdev); - - /* Release all pending user interrupts, each pending user interrupt - * holds a reference to user context - */ - hl_release_pending_user_interrupts(hdev); + cleanup_resources(hdev, hard_reset, fw_reset); kill_processes: if (hard_reset) { @@ -1057,12 +1073,15 @@ kill_processes: } /* Reset the H/W. It will be in idle state after this returns */ - hdev->asic_funcs->hw_fini(hdev, hard_reset); + hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset); if (hard_reset) { + hdev->fw_loader.linux_loaded = false; + /* Release kernel context */ if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1) hdev->kernel_ctx = NULL; + hl_vm_fini(hdev); hl_mmu_fini(hdev); hl_eq_reset(hdev, &hdev->event_queue); @@ -1292,6 +1311,10 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) if (rc) goto user_interrupts_fini; + + /* initialize completion structure for multi CS wait */ + hl_multi_cs_completion_init(hdev); + /* * Initialize the H/W queues. Must be done before hw_init, because * there the addresses of the kernel queue are being written to the @@ -1361,6 +1384,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) hdev->compute_ctx = NULL; + hdev->asic_funcs->state_dump_init(hdev); + hl_debugfs_add_device(hdev); /* debugfs nodes are created in hl_ctx_init so it must be called after @@ -1567,31 +1592,13 @@ void hl_device_fini(struct hl_device *hdev) /* Mark device as disabled */ hdev->disabled = true; - /* Flush anyone that is inside the critical section of enqueue - * jobs to the H/W - */ - hdev->asic_funcs->hw_queues_lock(hdev); - hdev->asic_funcs->hw_queues_unlock(hdev); - - /* Flush anyone that is inside device open */ - mutex_lock(&hdev->fpriv_list_lock); - mutex_unlock(&hdev->fpriv_list_lock); + take_release_locks(hdev); hdev->hard_reset_pending = true; hl_hwmon_fini(hdev); - device_late_fini(hdev); - - /* - * Halt the engines and disable interrupts so we won't get any more - * completions from H/W and we won't have any accesses from the - * H/W to the host machine - */ - hdev->asic_funcs->halt_engines(hdev, true); - - /* Go over all the queues, release all CS and their jobs */ - hl_cs_rollback_all(hdev); + cleanup_resources(hdev, true, false); /* Kill processes here after CS rollback. This is because the process * can't really exit until all its CSs are done, which is what we @@ -1610,7 +1617,9 @@ void hl_device_fini(struct hl_device *hdev) hl_cb_pool_fini(hdev); /* Reset the H/W. It will be in idle state after this returns */ - hdev->asic_funcs->hw_fini(hdev, true); + hdev->asic_funcs->hw_fini(hdev, true, false); + + hdev->fw_loader.linux_loaded = false; /* Release kernel context */ if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1)) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 2e4d04ec6b53..8d2568c63f19 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2021 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -240,11 +240,15 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, /* set fence to a non valid value */ pkt->fence = cpu_to_le32(UINT_MAX); - rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr); - if (rc) { - dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc); - goto out; - } + /* + * The CPU queue is a synchronous queue with an effective depth of + * a single entry (although it is allocated with room for multiple + * entries). We lock on it using 'send_cpu_message_lock' which + * serializes accesses to the CPU queue. + * Which means that we don't need to lock the access to the entire H/W + * queues module when submitting a JOB to the CPU queue. + */ + hl_hw_queue_submit_bd(hdev, queue, 0, len, pkt_dma_addr); if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN) expected_ack_val = queue->pi; @@ -663,17 +667,15 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev, hdev->event_queue.check_eqe_index = false; /* Read FW application security bits again */ - if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid) { - hdev->asic_prop.fw_app_cpu_boot_dev_sts0 = - RREG32(sts_boot_dev_sts0_reg); - if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & + if (prop->fw_cpu_boot_dev_sts0_valid) { + prop->fw_app_cpu_boot_dev_sts0 = RREG32(sts_boot_dev_sts0_reg); + if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_EQ_INDEX_EN) hdev->event_queue.check_eqe_index = true; } - if (hdev->asic_prop.fw_cpu_boot_dev_sts1_valid) - hdev->asic_prop.fw_app_cpu_boot_dev_sts1 = - RREG32(sts_boot_dev_sts1_reg); + if (prop->fw_cpu_boot_dev_sts1_valid) + prop->fw_app_cpu_boot_dev_sts1 = RREG32(sts_boot_dev_sts1_reg); out: hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, @@ -1008,6 +1010,11 @@ void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev) } else { WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_GOTO_WFE); msleep(static_loader->cpu_reset_wait_msec); + + /* Must clear this register in order to prevent preboot + * from reading WFE after reboot + */ + WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_NA); } hdev->device_cpu_is_halted = true; @@ -1055,6 +1062,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) dev_err(hdev->dev, "Device boot progress - Thermal Sensor initialization failed\n"); break; + case CPU_BOOT_STATUS_SECURITY_READY: + dev_err(hdev->dev, + "Device boot progress - Stuck in preboot after security initialization\n"); + break; default: dev_err(hdev->dev, "Device boot progress - Invalid status code %d\n", @@ -1238,11 +1249,6 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev) * b. Check whether hard reset is done by boot cpu * 3. FW application - a. Fetch fw application security status * b. Check whether hard reset is done by fw app - * - * Preboot: - * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED). If set, then- - * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN) - * If set, then mark GIC controller to be disabled. */ prop->hard_reset_done_by_fw = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN); @@ -1953,8 +1959,8 @@ static void hl_fw_dynamic_update_linux_interrupt_if(struct hl_device *hdev) if (!hdev->asic_prop.gic_interrupts_enable && !(hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN)) { - dyn_regs->gic_host_halt_irq = dyn_regs->gic_host_irq_ctrl; - dyn_regs->gic_host_ints_irq = dyn_regs->gic_host_irq_ctrl; + dyn_regs->gic_host_halt_irq = dyn_regs->gic_host_pi_upd_irq; + dyn_regs->gic_host_ints_irq = dyn_regs->gic_host_pi_upd_irq; dev_warn(hdev->dev, "Using a single interrupt interface towards cpucp"); @@ -2122,8 +2128,7 @@ static void hl_fw_linux_update_state(struct hl_device *hdev, /* Read FW application security bits */ if (prop->fw_cpu_boot_dev_sts0_valid) { - prop->fw_app_cpu_boot_dev_sts0 = - RREG32(cpu_boot_dev_sts0_reg); + prop->fw_app_cpu_boot_dev_sts0 = RREG32(cpu_boot_dev_sts0_reg); if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) @@ -2143,8 +2148,7 @@ static void hl_fw_linux_update_state(struct hl_device *hdev, } if (prop->fw_cpu_boot_dev_sts1_valid) { - prop->fw_app_cpu_boot_dev_sts1 = - RREG32(cpu_boot_dev_sts1_reg); + prop->fw_app_cpu_boot_dev_sts1 = RREG32(cpu_boot_dev_sts1_reg); dev_dbg(hdev->dev, "Firmware application CPU status1 %#x\n", @@ -2235,6 +2239,10 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, dev_info(hdev->dev, "Loading firmware to device, may take some time...\n"); + /* + * In this stage, "cpu_dyn_regs" contains only LKD's hard coded values! + * It will be updated from FW after hl_fw_dynamic_request_descriptor(). + */ dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs; rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE, diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 6b3cdd7e068a..bebebcb163ee 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -20,6 +20,7 @@ #include <linux/scatterlist.h> #include <linux/hashtable.h> #include <linux/debugfs.h> +#include <linux/rwsem.h> #include <linux/bitfield.h> #include <linux/genalloc.h> #include <linux/sched/signal.h> @@ -65,6 +66,11 @@ #define HL_COMMON_USER_INTERRUPT_ID 0xFFF +#define HL_STATE_DUMP_HIST_LEN 5 + +#define OBJ_NAMES_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ +#define SYNC_TO_ENGINE_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ + /* Memory */ #define MEM_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ @@ -122,12 +128,17 @@ enum hl_mmu_page_table_location { * * - HL_RESET_DEVICE_RELEASE * Set if reset is due to device release + * + * - HL_RESET_FW + * F/W will perform the reset. No need to ask it to reset the device. This is relevant + * only when running with secured f/w */ #define HL_RESET_HARD (1 << 0) #define HL_RESET_FROM_RESET_THREAD (1 << 1) #define HL_RESET_HEARTBEAT (1 << 2) #define HL_RESET_TDR (1 << 3) #define HL_RESET_DEVICE_RELEASE (1 << 4) +#define HL_RESET_FW (1 << 5) #define HL_MAX_SOBS_PER_MONITOR 8 @@ -236,7 +247,9 @@ enum hl_cs_type { CS_TYPE_DEFAULT, CS_TYPE_SIGNAL, CS_TYPE_WAIT, - CS_TYPE_COLLECTIVE_WAIT + CS_TYPE_COLLECTIVE_WAIT, + CS_RESERVE_SIGNALS, + CS_UNRESERVE_SIGNALS }; /* @@ -281,13 +294,17 @@ enum queue_cb_alloc_flags { * @hdev: habanalabs device structure. * @kref: refcount of this SOB. The SOB will reset once the refcount is zero. * @sob_id: id of this SOB. + * @sob_addr: the sob offset from the base address. * @q_idx: the H/W queue that uses this SOB. + * @need_reset: reset indication set when switching to the other sob. */ struct hl_hw_sob { struct hl_device *hdev; struct kref kref; u32 sob_id; + u32 sob_addr; u32 q_idx; + bool need_reset; }; enum hl_collective_mode { @@ -317,11 +334,11 @@ struct hw_queue_properties { }; /** - * enum vm_type_t - virtual memory mapping request information. + * enum vm_type - virtual memory mapping request information. * @VM_TYPE_USERPTR: mapping of user memory to device virtual address. * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address. */ -enum vm_type_t { +enum vm_type { VM_TYPE_USERPTR = 0x1, VM_TYPE_PHYS_PACK = 0x2 }; @@ -382,6 +399,16 @@ struct hl_mmu_properties { }; /** + * struct hl_hints_range - hint addresses reserved va range. + * @start_addr: start address of the va range. + * @end_addr: end address of the va range. + */ +struct hl_hints_range { + u64 start_addr; + u64 end_addr; +}; + +/** * struct asic_fixed_properties - ASIC specific immutable properties. * @hw_queues_props: H/W queues properties. * @cpucp_info: received various information from CPU-CP regarding the H/W, e.g. @@ -392,6 +419,10 @@ struct hl_mmu_properties { * @pmmu: PCI (host) MMU address translation properties. * @pmmu_huge: PCI (host) MMU address translation properties for memory * allocated with huge pages. + * @hints_dram_reserved_va_range: dram hint addresses reserved range. + * @hints_host_reserved_va_range: host hint addresses reserved range. + * @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved + * range. * @sram_base_address: SRAM physical start address. * @sram_end_address: SRAM physical end address. * @sram_user_base_address - SRAM physical start address for user access. @@ -412,6 +443,10 @@ struct hl_mmu_properties { * to the device's MMU. * @cb_va_end_addr: virtual end address of command buffers which are mapped to * the device's MMU. + * @dram_hints_align_mask: dram va hint addresses alignment mask which is used + * for hints validity check. + * device_dma_offset_for_host_access: the offset to add to host DMA addresses + * to enable the device to access them. * @mmu_pgt_size: MMU page tables total size. * @mmu_pte_size: PTE size in MMU page tables. * @mmu_hop_table_size: MMU hop table size. @@ -459,6 +494,8 @@ struct hl_mmu_properties { * reserved for the user * @first_available_cq: first available CQ for the user. * @user_interrupt_count: number of user interrupts. + * @server_type: Server type that the ASIC is currently installed in. + * The value is according to enum hl_server_type in uapi file. * @tpc_enabled_mask: which TPCs are enabled. * @completion_queues_count: number of completion queues. * @fw_security_enabled: true if security measures are enabled in firmware, @@ -470,6 +507,7 @@ struct hl_mmu_properties { * @dram_supports_virtual_memory: is there an MMU towards the DRAM * @hard_reset_done_by_fw: true if firmware is handling hard reset flow * @num_functional_hbms: number of functional HBMs in each DCORE. + * @hints_range_reservation: device support hint addresses range reservation. * @iatu_done_by_fw: true if iATU configuration is being done by FW. * @dynamic_fw_load: is dynamic FW load is supported. * @gic_interrupts_enable: true if FW is not blocking GIC controller, @@ -483,6 +521,9 @@ struct asic_fixed_properties { struct hl_mmu_properties dmmu; struct hl_mmu_properties pmmu; struct hl_mmu_properties pmmu_huge; + struct hl_hints_range hints_dram_reserved_va_range; + struct hl_hints_range hints_host_reserved_va_range; + struct hl_hints_range hints_host_hpage_reserved_va_range; u64 sram_base_address; u64 sram_end_address; u64 sram_user_base_address; @@ -500,6 +541,8 @@ struct asic_fixed_properties { u64 mmu_dram_default_page_addr; u64 cb_va_start_addr; u64 cb_va_end_addr; + u64 dram_hints_align_mask; + u64 device_dma_offset_for_host_access; u32 mmu_pgt_size; u32 mmu_pte_size; u32 mmu_hop_table_size; @@ -534,6 +577,7 @@ struct asic_fixed_properties { u16 first_available_user_msix_interrupt; u16 first_available_cq[HL_MAX_DCORES]; u16 user_interrupt_count; + u16 server_type; u8 tpc_enabled_mask; u8 completion_queues_count; u8 fw_security_enabled; @@ -542,6 +586,7 @@ struct asic_fixed_properties { u8 dram_supports_virtual_memory; u8 hard_reset_done_by_fw; u8 num_functional_hbms; + u8 hints_range_reservation; u8 iatu_done_by_fw; u8 dynamic_fw_load; u8 gic_interrupts_enable; @@ -552,40 +597,45 @@ struct asic_fixed_properties { * @completion: fence is implemented using completion * @refcount: refcount for this fence * @cs_sequence: sequence of the corresponding command submission + * @stream_master_qid_map: streams masters QID bitmap to represent all streams + * masters QIDs that multi cs is waiting on * @error: mark this fence with error * @timestamp: timestamp upon completion - * */ struct hl_fence { struct completion completion; struct kref refcount; u64 cs_sequence; + u32 stream_master_qid_map; int error; ktime_t timestamp; }; /** * struct hl_cs_compl - command submission completion object. - * @sob_reset_work: workqueue object to run SOB reset flow. * @base_fence: hl fence object. * @lock: spinlock to protect fence. * @hdev: habanalabs device structure. * @hw_sob: the H/W SOB used in this signal/wait CS. + * @encaps_sig_hdl: encaps signals hanlder. * @cs_seq: command submission sequence number. * @type: type of the CS - signal/wait. * @sob_val: the SOB value that is used in this signal/wait CS. * @sob_group: the SOB group that is used in this collective wait CS. + * @encaps_signals: indication whether it's a completion object of cs with + * encaps signals or not. */ struct hl_cs_compl { - struct work_struct sob_reset_work; struct hl_fence base_fence; spinlock_t lock; struct hl_device *hdev; struct hl_hw_sob *hw_sob; + struct hl_cs_encaps_sig_handle *encaps_sig_hdl; u64 cs_seq; enum hl_cs_type type; u16 sob_val; u16 sob_group; + bool encaps_signals; }; /* @@ -698,6 +748,17 @@ struct hl_sync_stream_properties { }; /** + * struct hl_encaps_signals_mgr - describes sync stream encapsulated signals + * handlers manager + * @lock: protects handles. + * @handles: an idr to hold all encapsulated signals handles. + */ +struct hl_encaps_signals_mgr { + spinlock_t lock; + struct idr handles; +}; + +/** * struct hl_hw_queue - describes a H/W transport queue. * @shadow_queue: pointer to a shadow queue that holds pointers to jobs. * @sync_stream_prop: sync stream queue properties @@ -875,7 +936,7 @@ struct pci_mem_region { u64 region_base; u64 region_size; u64 bar_size; - u32 offset_in_bar; + u64 offset_in_bar; u8 bar_id; u8 used; }; @@ -996,7 +1057,7 @@ struct fw_load_mgr { * hw_fini and before CS rollback. * @suspend: handles IP specific H/W or SW changes for suspend. * @resume: handles IP specific H/W or SW changes for resume. - * @cb_mmap: maps a CB. + * @mmap: maps a memory. * @ring_doorbell: increment PI on a given QMAN. * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific * function because the PQs are located in different memory areas @@ -1101,6 +1162,10 @@ struct fw_load_mgr { * generic f/w compatible PLL Indexes * @init_firmware_loader: initialize data for FW loader. * @init_cpu_scrambler_dram: Enable CPU specific DRAM scrambling + * @state_dump_init: initialize constants required for state dump + * @get_sob_addr: get SOB base address offset. + * @set_pci_memory_regions: setting properties of PCI memory regions + * @get_stream_master_qid_arr: get pointer to stream masters QID array */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -1110,11 +1175,11 @@ struct hl_asic_funcs { int (*sw_init)(struct hl_device *hdev); int (*sw_fini)(struct hl_device *hdev); int (*hw_init)(struct hl_device *hdev); - void (*hw_fini)(struct hl_device *hdev, bool hard_reset); - void (*halt_engines)(struct hl_device *hdev, bool hard_reset); + void (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset); + void (*halt_engines)(struct hl_device *hdev, bool hard_reset, bool fw_reset); int (*suspend)(struct hl_device *hdev); int (*resume)(struct hl_device *hdev); - int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma, + int (*mmap)(struct hl_device *hdev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size); void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi); void (*pqe_write)(struct hl_device *hdev, __le64 *pqe, @@ -1210,10 +1275,11 @@ struct hl_asic_funcs { void (*reset_sob_group)(struct hl_device *hdev, u16 sob_group); void (*set_dma_mask_from_fw)(struct hl_device *hdev); u64 (*get_device_time)(struct hl_device *hdev); - void (*collective_wait_init_cs)(struct hl_cs *cs); + int (*collective_wait_init_cs)(struct hl_cs *cs); int (*collective_wait_create_jobs)(struct hl_device *hdev, - struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id, - u32 collective_engine_id); + struct hl_ctx *ctx, struct hl_cs *cs, + u32 wait_queue_id, u32 collective_engine_id, + u32 encaps_signal_offset); u64 (*scramble_addr)(struct hl_device *hdev, u64 addr); u64 (*descramble_addr)(struct hl_device *hdev, u64 addr); void (*ack_protection_bits_errors)(struct hl_device *hdev); @@ -1226,6 +1292,10 @@ struct hl_asic_funcs { int (*map_pll_idx_to_fw_idx)(u32 pll_idx); void (*init_firmware_loader)(struct hl_device *hdev); void (*init_cpu_scrambler_dram)(struct hl_device *hdev); + void (*state_dump_init)(struct hl_device *hdev); + u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id); + void (*set_pci_memory_regions)(struct hl_device *hdev); + u32* (*get_stream_master_qid_arr)(void); }; @@ -1283,20 +1353,6 @@ struct hl_cs_counters_atomic { }; /** - * struct hl_pending_cb - pending command buffer structure - * @cb_node: cb node in pending cb list - * @cb: command buffer to send in next submission - * @cb_size: command buffer size - * @hw_queue_id: destination queue id - */ -struct hl_pending_cb { - struct list_head cb_node; - struct hl_cb *cb; - u32 cb_size; - u32 hw_queue_id; -}; - -/** * struct hl_ctx - user/kernel context. * @mem_hash: holds mapping from virtual address to virtual memory area * descriptor (hl_vm_phys_pg_list or hl_userptr). @@ -1312,28 +1368,21 @@ struct hl_pending_cb { * MMU hash or walking the PGT requires talking this lock. * @hw_block_list_lock: protects the HW block memory list. * @debugfs_list: node in debugfs list of contexts. - * pending_cb_list: list of pending command buffers waiting to be sent upon - * next user command submission context. * @hw_block_mem_list: list of HW block virtual mapped addresses. * @cs_counters: context command submission counters. * @cb_va_pool: device VA pool for command buffers which are mapped to the * device's MMU. + * @sig_mgr: encaps signals handle manager. * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed * to user so user could inquire about CS. It is used as * index to cs_pending array. * @dram_default_hops: array that holds all hops addresses needed for default * DRAM mapping. - * @pending_cb_lock: spinlock to protect pending cb list * @cs_lock: spinlock to protect cs_sequence. * @dram_phys_mem: amount of used physical DRAM memory by this context. * @thread_ctx_switch_token: token to prevent multiple threads of the same * context from running the context switch phase. * Only a single thread should run it. - * @thread_pending_cb_token: token to prevent multiple threads from processing - * the pending CB list. Only a single thread should - * process the list since it is protected by a - * spinlock and we don't want to halt the entire - * command submission sequence. * @thread_ctx_switch_wait_token: token to prevent the threads that didn't run * the context switch phase from moving to their * execution phase before the context switch phase @@ -1353,17 +1402,15 @@ struct hl_ctx { struct mutex mmu_lock; struct mutex hw_block_list_lock; struct list_head debugfs_list; - struct list_head pending_cb_list; struct list_head hw_block_mem_list; struct hl_cs_counters_atomic cs_counters; struct gen_pool *cb_va_pool; + struct hl_encaps_signals_mgr sig_mgr; u64 cs_sequence; u64 *dram_default_hops; - spinlock_t pending_cb_lock; spinlock_t cs_lock; atomic64_t dram_phys_mem; atomic_t thread_ctx_switch_token; - atomic_t thread_pending_cb_token; u32 thread_ctx_switch_wait_token; u32 asid; u32 handle; @@ -1394,20 +1441,22 @@ struct hl_ctx_mgr { * @sgt: pointer to the scatter-gather table that holds the pages. * @dir: for DMA unmapping, the direction must be supplied, so save it. * @debugfs_list: node in debugfs list of command submissions. + * @pid: the pid of the user process owning the memory * @addr: user-space virtual address of the start of the memory area. * @size: size of the memory area to pin & map. * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise. */ struct hl_userptr { - enum vm_type_t vm_type; /* must be first */ + enum vm_type vm_type; /* must be first */ struct list_head job_node; struct page **pages; unsigned int npages; struct sg_table *sgt; enum dma_data_direction dir; struct list_head debugfs_list; + pid_t pid; u64 addr; - u32 size; + u64 size; u8 dma_mapped; }; @@ -1426,12 +1475,14 @@ struct hl_userptr { * @mirror_node : node in device mirror list of command submissions. * @staged_cs_node: node in the staged cs list. * @debugfs_list: node in debugfs list of command submissions. + * @encaps_sig_hdl: holds the encaps signals handle. * @sequence: the sequence number of this CS. * @staged_sequence: the sequence of the staged submission this CS is part of, * relevant only if staged_cs is set. * @timeout_jiffies: cs timeout in jiffies. * @submission_time_jiffies: submission time of the cs * @type: CS_TYPE_*. + * @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs. * @submitted: true if CS was submitted to H/W. * @completed: true if CS was completed by device. * @timedout : true if CS was timedout. @@ -1445,6 +1496,7 @@ struct hl_userptr { * @staged_cs: true if this CS is part of a staged submission. * @skip_reset_on_timeout: true if we shall not reset the device in case * timeout occurs (debug scenario). + * @encaps_signals: true if this CS has encaps reserved signals. */ struct hl_cs { u16 *jobs_in_queue_cnt; @@ -1459,11 +1511,13 @@ struct hl_cs { struct list_head mirror_node; struct list_head staged_cs_node; struct list_head debugfs_list; + struct hl_cs_encaps_sig_handle *encaps_sig_hdl; u64 sequence; u64 staged_sequence; u64 timeout_jiffies; u64 submission_time_jiffies; enum hl_cs_type type; + u32 encaps_sig_hdl_id; u8 submitted; u8 completed; u8 timedout; @@ -1474,6 +1528,7 @@ struct hl_cs { u8 staged_first; u8 staged_cs; u8 skip_reset_on_timeout; + u8 encaps_signals; }; /** @@ -1493,6 +1548,8 @@ struct hl_cs { * @hw_queue_id: the id of the H/W queue this job is submitted to. * @user_cb_size: the actual size of the CB we got from the user. * @job_cb_size: the actual size of the CB that we put on the queue. + * @encaps_sig_wait_offset: encapsulated signals offset, which allow user + * to wait on part of the reserved signals. * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a * handle to a kernel-allocated CB object, false * otherwise (SRAM/DRAM/host address). @@ -1517,6 +1574,7 @@ struct hl_cs_job { u32 hw_queue_id; u32 user_cb_size; u32 job_cb_size; + u32 encaps_sig_wait_offset; u8 is_kernel_allocated_cb; u8 contains_dma_pkt; }; @@ -1613,7 +1671,7 @@ struct hl_vm_hw_block_list_node { * @created_from_userptr: is product of host virtual address. */ struct hl_vm_phys_pg_pack { - enum vm_type_t vm_type; /* must be first */ + enum vm_type vm_type; /* must be first */ u64 *pages; u64 npages; u64 total_size; @@ -1759,9 +1817,13 @@ struct hl_debugfs_entry { * @ctx_mem_hash_list: list of available contexts with MMU mappings. * @ctx_mem_hash_spinlock: protects cb_list. * @blob_desc: descriptor of blob + * @state_dump: data of the system states in case of a bad cs. + * @state_dump_sem: protects state_dump. * @addr: next address to read/write from/to in read/write32. * @mmu_addr: next virtual address to translate to physical address in mmu_show. + * @userptr_lookup: the target user ptr to look up for on demand. * @mmu_asid: ASID to use while translating in mmu_show. + * @state_dump_head: index of the latest state dump * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read. * @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read. * @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read. @@ -1783,14 +1845,149 @@ struct hl_dbg_device_entry { struct list_head ctx_mem_hash_list; spinlock_t ctx_mem_hash_spinlock; struct debugfs_blob_wrapper blob_desc; + char *state_dump[HL_STATE_DUMP_HIST_LEN]; + struct rw_semaphore state_dump_sem; u64 addr; u64 mmu_addr; + u64 userptr_lookup; u32 mmu_asid; + u32 state_dump_head; u8 i2c_bus; u8 i2c_addr; u8 i2c_reg; }; +/** + * struct hl_hw_obj_name_entry - single hw object name, member of + * hl_state_dump_specs + * @node: link to the containing hash table + * @name: hw object name + * @id: object identifier + */ +struct hl_hw_obj_name_entry { + struct hlist_node node; + const char *name; + u32 id; +}; + +enum hl_state_dump_specs_props { + SP_SYNC_OBJ_BASE_ADDR, + SP_NEXT_SYNC_OBJ_ADDR, + SP_SYNC_OBJ_AMOUNT, + SP_MON_OBJ_WR_ADDR_LOW, + SP_MON_OBJ_WR_ADDR_HIGH, + SP_MON_OBJ_WR_DATA, + SP_MON_OBJ_ARM_DATA, + SP_MON_OBJ_STATUS, + SP_MONITORS_AMOUNT, + SP_TPC0_CMDQ, + SP_TPC0_CFG_SO, + SP_NEXT_TPC, + SP_MME_CMDQ, + SP_MME_CFG_SO, + SP_NEXT_MME, + SP_DMA_CMDQ, + SP_DMA_CFG_SO, + SP_DMA_QUEUES_OFFSET, + SP_NUM_OF_MME_ENGINES, + SP_SUB_MME_ENG_NUM, + SP_NUM_OF_DMA_ENGINES, + SP_NUM_OF_TPC_ENGINES, + SP_ENGINE_NUM_OF_QUEUES, + SP_ENGINE_NUM_OF_STREAMS, + SP_ENGINE_NUM_OF_FENCES, + SP_FENCE0_CNT_OFFSET, + SP_FENCE0_RDATA_OFFSET, + SP_CP_STS_OFFSET, + SP_NUM_CORES, + + SP_MAX +}; + +enum hl_sync_engine_type { + ENGINE_TPC, + ENGINE_DMA, + ENGINE_MME, +}; + +/** + * struct hl_mon_state_dump - represents a state dump of a single monitor + * @id: monitor id + * @wr_addr_low: address monitor will write to, low bits + * @wr_addr_high: address monitor will write to, high bits + * @wr_data: data monitor will write + * @arm_data: register value containing monitor configuration + * @status: monitor status + */ +struct hl_mon_state_dump { + u32 id; + u32 wr_addr_low; + u32 wr_addr_high; + u32 wr_data; + u32 arm_data; + u32 status; +}; + +/** + * struct hl_sync_to_engine_map_entry - sync object id to engine mapping entry + * @engine_type: type of the engine + * @engine_id: id of the engine + * @sync_id: id of the sync object + */ +struct hl_sync_to_engine_map_entry { + struct hlist_node node; + enum hl_sync_engine_type engine_type; + u32 engine_id; + u32 sync_id; +}; + +/** + * struct hl_sync_to_engine_map - maps sync object id to associated engine id + * @tb: hash table containing the mapping, each element is of type + * struct hl_sync_to_engine_map_entry + */ +struct hl_sync_to_engine_map { + DECLARE_HASHTABLE(tb, SYNC_TO_ENGINE_HASH_TABLE_BITS); +}; + +/** + * struct hl_state_dump_specs_funcs - virtual functions used by the state dump + * @gen_sync_to_engine_map: generate a hash map from sync obj id to its engine + * @print_single_monitor: format monitor data as string + * @monitor_valid: return true if given monitor dump is valid + * @print_fences_single_engine: format fences data as string + */ +struct hl_state_dump_specs_funcs { + int (*gen_sync_to_engine_map)(struct hl_device *hdev, + struct hl_sync_to_engine_map *map); + int (*print_single_monitor)(char **buf, size_t *size, size_t *offset, + struct hl_device *hdev, + struct hl_mon_state_dump *mon); + int (*monitor_valid)(struct hl_mon_state_dump *mon); + int (*print_fences_single_engine)(struct hl_device *hdev, + u64 base_offset, + u64 status_base_offset, + enum hl_sync_engine_type engine_type, + u32 engine_id, char **buf, + size_t *size, size_t *offset); +}; + +/** + * struct hl_state_dump_specs - defines ASIC known hw objects names + * @so_id_to_str_tb: sync objects names index table + * @monitor_id_to_str_tb: monitors names index table + * @funcs: virtual functions used for state dump + * @sync_namager_names: readable names for sync manager if available (ex: N_E) + * @props: pointer to a per asic const props array required for state dump + */ +struct hl_state_dump_specs { + DECLARE_HASHTABLE(so_id_to_str_tb, OBJ_NAMES_HASH_TABLE_BITS); + DECLARE_HASHTABLE(monitor_id_to_str_tb, OBJ_NAMES_HASH_TABLE_BITS); + struct hl_state_dump_specs_funcs funcs; + const char * const *sync_namager_names; + s64 *props; +}; + /* * DEVICES @@ -1798,7 +1995,7 @@ struct hl_dbg_device_entry { #define HL_STR_MAX 32 -#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_NEEDS_RESET + 1) +#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_LAST + 1) /* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards. @@ -1946,11 +2143,13 @@ struct hwmon_chip_info; * @wq: work queue for device reset procedure. * @reset_work: reset work to be done. * @hdev: habanalabs device structure. + * @fw_reset: whether f/w will do the reset without us sending them a message to do it. */ struct hl_device_reset_work { struct workqueue_struct *wq; struct delayed_work reset_work; struct hl_device *hdev; + bool fw_reset; }; /** @@ -2065,6 +2264,58 @@ struct hl_mmu_funcs { }; /** + * number of user contexts allowed to call wait_for_multi_cs ioctl in + * parallel + */ +#define MULTI_CS_MAX_USER_CTX 2 + +/** + * struct multi_cs_completion - multi CS wait completion. + * @completion: completion of any of the CS in the list + * @lock: spinlock for the completion structure + * @timestamp: timestamp for the multi-CS completion + * @stream_master_qid_map: bitmap of all stream masters on which the multi-CS + * is waiting + * @used: 1 if in use, otherwise 0 + */ +struct multi_cs_completion { + struct completion completion; + spinlock_t lock; + s64 timestamp; + u32 stream_master_qid_map; + u8 used; +}; + +/** + * struct multi_cs_data - internal data for multi CS call + * @ctx: pointer to the context structure + * @fence_arr: array of fences of all CSs + * @seq_arr: array of CS sequence numbers + * @timeout_us: timeout in usec for waiting for CS to complete + * @timestamp: timestamp of first completed CS + * @wait_status: wait for CS status + * @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0) + * @stream_master_qid_map: bitmap of all stream master QIDs on which the + * multi-CS is waiting + * @arr_len: fence_arr and seq_arr array length + * @gone_cs: indication of gone CS (1- there was gone CS, otherwise 0) + * @update_ts: update timestamp. 1- update the timestamp, otherwise 0. + */ +struct multi_cs_data { + struct hl_ctx *ctx; + struct hl_fence **fence_arr; + u64 *seq_arr; + s64 timeout_us; + s64 timestamp; + long wait_status; + u32 completion_bitmap; + u32 stream_master_qid_map; + u8 arr_len; + u8 gone_cs; + u8 update_ts; +}; + +/** * struct hl_device - habanalabs device structure. * @pdev: pointer to PCI device, can be NULL in case of simulator device. * @pcie_bar_phys: array of available PCIe bars physical addresses. @@ -2129,6 +2380,8 @@ struct hl_mmu_funcs { * @mmu_func: device-related MMU functions. * @fw_loader: FW loader manager. * @pci_mem_region: array of memory regions in the PCI + * @state_dump_specs: constants and dictionaries needed to dump system state. + * @multi_cs_completion: array of multi-CS completion. * @dram_used_mem: current DRAM memory consumption. * @timeout_jiffies: device CS timeout value. * @max_power: the max power of the device, as configured by the sysadmin. This @@ -2205,6 +2458,7 @@ struct hl_mmu_funcs { * halted. We can't halt it again because the COMMS * protocol will throw an error. Relevant only for * cases where Linux was not loaded to device CPU + * @supports_wait_for_multi_cs: true if wait for multi CS is supported */ struct hl_device { struct pci_dev *pdev; @@ -2273,6 +2527,11 @@ struct hl_device { struct pci_mem_region pci_mem_region[PCI_REGION_NUMBER]; + struct hl_state_dump_specs state_dump_specs; + + struct multi_cs_completion multi_cs_completion[ + MULTI_CS_MAX_USER_CTX]; + u32 *stream_master_qid_arr; atomic64_t dram_used_mem; u64 timeout_jiffies; u64 max_power; @@ -2322,6 +2581,8 @@ struct hl_device { u8 curr_reset_cause; u8 skip_reset_on_timeout; u8 device_cpu_is_halted; + u8 supports_wait_for_multi_cs; + u8 stream_master_qid_arr_size; /* Parameters for bring-up */ u64 nic_ports_mask; @@ -2343,6 +2604,29 @@ struct hl_device { }; +/** + * struct hl_cs_encaps_sig_handle - encapsulated signals handle structure + * @refcount: refcount used to protect removing this id when several + * wait cs are used to wait of the reserved encaps signals. + * @hdev: pointer to habanalabs device structure. + * @hw_sob: pointer to H/W SOB used in the reservation. + * @cs_seq: staged cs sequence which contains encapsulated signals + * @id: idr handler id to be used to fetch the handler info + * @q_idx: stream queue index + * @pre_sob_val: current SOB value before reservation + * @count: signals number + */ +struct hl_cs_encaps_sig_handle { + struct kref refcount; + struct hl_device *hdev; + struct hl_hw_sob *hw_sob; + u64 cs_seq; + u32 id; + u32 q_idx; + u32 pre_sob_val; + u32 count; +}; + /* * IOCTLs */ @@ -2373,6 +2657,23 @@ struct hl_ioctl_desc { */ /** + * hl_get_sg_info() - get number of pages and the DMA address from SG list. + * @sg: the SG list. + * @dma_addr: pointer to DMA address to return. + * + * Calculate the number of consecutive pages described by the SG list. Take the + * offset of the address in the first page, add to it the length and round it up + * to the number of needed pages. + */ +static inline u32 hl_get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) +{ + *dma_addr = sg_dma_address(sg); + + return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) + + (PAGE_SIZE - 1)) >> PAGE_SHIFT; +} + +/** * hl_mem_area_inside_range() - Checks whether address+size are inside a range. * @address: The start address of the area we want to validate. * @size: The size in bytes of the area we want to validate. @@ -2436,7 +2737,9 @@ void destroy_hdev(struct hl_device *hdev); int hl_hw_queues_create(struct hl_device *hdev); void hl_hw_queues_destroy(struct hl_device *hdev); int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, - u32 cb_size, u64 cb_ptr); + u32 cb_size, u64 cb_ptr); +void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q, + u32 ctl, u32 len, u64 ptr); int hl_hw_queue_schedule_cs(struct hl_cs *cs); u32 hl_hw_queue_add_ptr(u32 ptr, u16 val); void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id); @@ -2470,6 +2773,8 @@ void hl_ctx_do_release(struct kref *ref); void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx); int hl_ctx_put(struct hl_ctx *ctx); struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq); +int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr, + struct hl_fence **fence, u32 arr_len); void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr); void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr); @@ -2511,18 +2816,19 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx); void hl_cb_va_pool_fini(struct hl_ctx *ctx); void hl_cs_rollback_all(struct hl_device *hdev); -void hl_pending_cb_list_flush(struct hl_ctx *ctx); struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, enum hl_queue_type queue_type, bool is_kernel_allocated_cb); void hl_sob_reset_error(struct kref *ref); int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask); void hl_fence_put(struct hl_fence *fence); +void hl_fences_put(struct hl_fence **fence, int len); void hl_fence_get(struct hl_fence *fence); void cs_get(struct hl_cs *cs); bool cs_needs_completion(struct hl_cs *cs); bool cs_needs_timeout(struct hl_cs *cs); bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs); struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq); +void hl_multi_cs_completion_init(struct hl_device *hdev); void goya_set_asic_funcs(struct hl_device *hdev); void gaudi_set_asic_funcs(struct hl_device *hdev); @@ -2650,9 +2956,25 @@ int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value); int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value); +void hw_sob_get(struct hl_hw_sob *hw_sob); +void hw_sob_put(struct hl_hw_sob *hw_sob); +void hl_encaps_handle_do_release(struct kref *ref); +void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev, + struct hl_cs *cs, struct hl_cs_job *job, + struct hl_cs_compl *cs_cmpl); void hl_release_pending_user_interrupts(struct hl_device *hdev); int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx, - struct hl_hw_sob **hw_sob, u32 count); + struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig); + +int hl_state_dump(struct hl_device *hdev); +const char *hl_state_dump_get_sync_name(struct hl_device *hdev, u32 sync_id); +const char *hl_state_dump_get_monitor_name(struct hl_device *hdev, + struct hl_mon_state_dump *mon); +void hl_state_dump_free_sync_to_engine_map(struct hl_sync_to_engine_map *map); +__printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset, + const char *format, ...); +char *hl_format_as_binary(char *buf, size_t buf_len, u32 n); +const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type); #ifdef CONFIG_DEBUG_FS @@ -2673,6 +2995,8 @@ void hl_debugfs_remove_userptr(struct hl_device *hdev, struct hl_userptr *userptr); void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx); void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx); +void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data, + unsigned long length); #else @@ -2746,6 +3070,11 @@ static inline void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, { } +static inline void hl_debugfs_set_state_dump(struct hl_device *hdev, + char *data, unsigned long length) +{ +} + #endif /* IOCTLs */ diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 4194cda2d04c..a75e4fceb9d8 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -141,7 +141,7 @@ int hl_device_open(struct inode *inode, struct file *filp) hl_cb_mgr_init(&hpriv->cb_mgr); hl_ctx_mgr_init(&hpriv->ctx_mgr); - hpriv->taskpid = find_get_pid(current->pid); + hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); mutex_lock(&hdev->fpriv_list_lock); @@ -194,7 +194,6 @@ int hl_device_open(struct inode *inode, struct file *filp) out_err: mutex_unlock(&hdev->fpriv_list_lock); - hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); filp->private_data = NULL; @@ -318,12 +317,16 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, hdev->asic_prop.fw_security_enabled = false; /* Assign status description string */ - strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], - "disabled", HL_STR_MAX); + strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], + "operational", HL_STR_MAX); strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX); + strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], + "disabled", HL_STR_MAX); strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX); + strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION], + "in device creation", HL_STR_MAX); hdev->major = hl_major; hdev->reset_on_lockup = reset_on_lockup; @@ -532,7 +535,7 @@ hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) result = PCI_ERS_RESULT_NONE; } - hdev->asic_funcs->halt_engines(hdev, true); + hdev->asic_funcs->halt_engines(hdev, true, false); return result; } diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index f4dda7b4acdd..86c3257d9ae1 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -94,6 +94,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) hw_ip.first_available_interrupt_id = prop->first_available_user_msix_interrupt; + hw_ip.server_type = prop->server_type; + return copy_to_user(out, &hw_ip, min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0; } diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index bcabfdbf1e01..76b7de8f1406 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -65,7 +65,7 @@ void hl_hw_queue_update_ci(struct hl_cs *cs) } /* - * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a + * hl_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a * H/W queue. * @hdev: pointer to habanalabs device structure * @q: pointer to habanalabs queue structure @@ -80,8 +80,8 @@ void hl_hw_queue_update_ci(struct hl_cs *cs) * This function must be called when the scheduler mutex is taken * */ -static void ext_and_hw_queue_submit_bd(struct hl_device *hdev, - struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr) +void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q, + u32 ctl, u32 len, u64 ptr) { struct hl_bd *bd; @@ -222,8 +222,8 @@ static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q, * @cb_size: size of CB * @cb_ptr: pointer to CB location * - * This function sends a single CB, that must NOT generate a completion entry - * + * This function sends a single CB, that must NOT generate a completion entry. + * Sending CPU messages can be done instead via 'hl_hw_queue_submit_bd()' */ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, u32 cb_size, u64 cb_ptr) @@ -231,16 +231,7 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; int rc = 0; - /* - * The CPU queue is a synchronous queue with an effective depth of - * a single entry (although it is allocated with room for multiple - * entries). Therefore, there is a different lock, called - * send_cpu_message_lock, that serializes accesses to the CPU queue. - * As a result, we don't need to lock the access to the entire H/W - * queues module when submitting a JOB to the CPU queue - */ - if (q->queue_type != QUEUE_TYPE_CPU) - hdev->asic_funcs->hw_queues_lock(hdev); + hdev->asic_funcs->hw_queues_lock(hdev); if (hdev->disabled) { rc = -EPERM; @@ -258,11 +249,10 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, goto out; } - ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); + hl_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); out: - if (q->queue_type != QUEUE_TYPE_CPU) - hdev->asic_funcs->hw_queues_unlock(hdev); + hdev->asic_funcs->hw_queues_unlock(hdev); return rc; } @@ -328,7 +318,7 @@ static void ext_queue_schedule_job(struct hl_cs_job *job) cq->pi = hl_cq_inc_ptr(cq->pi); submit_bd: - ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); + hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr); } /* @@ -407,7 +397,7 @@ static void hw_queue_schedule_job(struct hl_cs_job *job) else ptr = (u64) (uintptr_t) job->user_cb; - ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); + hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr); } static int init_signal_cs(struct hl_device *hdev, @@ -426,8 +416,9 @@ static int init_signal_cs(struct hl_device *hdev, cs_cmpl->sob_val = prop->next_sob_val; dev_dbg(hdev->dev, - "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n", - cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx); + "generate signal CB, sob_id: %d, sob val: %u, q_idx: %d, seq: %llu\n", + cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx, + cs_cmpl->cs_seq); /* we set an EB since we must make sure all oeprations are done * when sending the signal @@ -435,17 +426,37 @@ static int init_signal_cs(struct hl_device *hdev, hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, cs_cmpl->hw_sob->sob_id, 0, true); - rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1); + rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1, + false); return rc; } -static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs, +void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev, + struct hl_cs *cs, struct hl_cs_job *job, + struct hl_cs_compl *cs_cmpl) +{ + struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; + + cs_cmpl->hw_sob = handle->hw_sob; + + /* Note that encaps_sig_wait_offset was validated earlier in the flow + * for offset value which exceeds the max reserved signal count. + * always decrement 1 of the offset since when the user + * set offset 1 for example he mean to wait only for the first + * signal only, which will be pre_sob_val, and if he set offset 2 + * then the value required is (pre_sob_val + 1) and so on... + */ + cs_cmpl->sob_val = handle->pre_sob_val + + (job->encaps_sig_wait_offset - 1); +} + +static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs, struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) { - struct hl_cs_compl *signal_cs_cmpl; - struct hl_sync_stream_properties *prop; struct hl_gen_wait_properties wait_prop; + struct hl_sync_stream_properties *prop; + struct hl_cs_compl *signal_cs_cmpl; u32 q_idx; q_idx = job->hw_queue_id; @@ -455,14 +466,51 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs, struct hl_cs_compl, base_fence); - /* copy the SOB id and value of the signal CS */ - cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; - cs_cmpl->sob_val = signal_cs_cmpl->sob_val; + if (cs->encaps_signals) { + /* use the encaps signal handle stored earlier in the flow + * and set the SOB information from the encaps + * signals handle + */ + hl_hw_queue_encaps_sig_set_sob_info(hdev, cs, job, cs_cmpl); + + dev_dbg(hdev->dev, "Wait for encaps signals handle, qidx(%u), CS sequence(%llu), sob val: 0x%x, offset: %u\n", + cs->encaps_sig_hdl->q_idx, + cs->encaps_sig_hdl->cs_seq, + cs_cmpl->sob_val, + job->encaps_sig_wait_offset); + } else { + /* Copy the SOB id and value of the signal CS */ + cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; + cs_cmpl->sob_val = signal_cs_cmpl->sob_val; + } + + /* check again if the signal cs already completed. + * if yes then don't send any wait cs since the hw_sob + * could be in reset already. if signal is not completed + * then get refcount to hw_sob to prevent resetting the sob + * while wait cs is not submitted. + * note that this check is protected by two locks, + * hw queue lock and completion object lock, + * and the same completion object lock also protects + * the hw_sob reset handler function. + * The hw_queue lock prevent out of sync of hw_sob + * refcount value, changed by signal/wait flows. + */ + spin_lock(&signal_cs_cmpl->lock); + + if (completion_done(&cs->signal_fence->completion)) { + spin_unlock(&signal_cs_cmpl->lock); + return -EINVAL; + } + + kref_get(&cs_cmpl->hw_sob->kref); + + spin_unlock(&signal_cs_cmpl->lock); dev_dbg(hdev->dev, - "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n", + "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d, seq: %llu\n", cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, - prop->base_mon_id, q_idx); + prop->base_mon_id, q_idx, cs->sequence); wait_prop.data = (void *) job->patched_cb; wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; @@ -471,17 +519,14 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs, wait_prop.mon_id = prop->base_mon_id; wait_prop.q_idx = q_idx; wait_prop.size = 0; + hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop); - kref_get(&cs_cmpl->hw_sob->kref); - /* - * Must put the signal fence after the SOB refcnt increment so - * the SOB refcnt won't turn 0 and reset the SOB before the - * wait CS was submitted. - */ mb(); hl_fence_put(cs->signal_fence); cs->signal_fence = NULL; + + return 0; } /* @@ -506,7 +551,60 @@ static int init_signal_wait_cs(struct hl_cs *cs) if (cs->type & CS_TYPE_SIGNAL) rc = init_signal_cs(hdev, job, cs_cmpl); else if (cs->type & CS_TYPE_WAIT) - init_wait_cs(hdev, cs, job, cs_cmpl); + rc = init_wait_cs(hdev, cs, job, cs_cmpl); + + return rc; +} + +static int encaps_sig_first_staged_cs_handler + (struct hl_device *hdev, struct hl_cs *cs) +{ + struct hl_cs_compl *cs_cmpl = + container_of(cs->fence, + struct hl_cs_compl, base_fence); + struct hl_cs_encaps_sig_handle *encaps_sig_hdl; + struct hl_encaps_signals_mgr *mgr; + int rc = 0; + + mgr = &hdev->compute_ctx->sig_mgr; + + spin_lock(&mgr->lock); + encaps_sig_hdl = idr_find(&mgr->handles, cs->encaps_sig_hdl_id); + if (encaps_sig_hdl) { + /* + * Set handler CS sequence, + * the CS which contains the encapsulated signals. + */ + encaps_sig_hdl->cs_seq = cs->sequence; + /* store the handle and set encaps signal indication, + * to be used later in cs_do_release to put the last + * reference to encaps signals handlers. + */ + cs_cmpl->encaps_signals = true; + cs_cmpl->encaps_sig_hdl = encaps_sig_hdl; + + /* set hw_sob pointer in completion object + * since it's used in cs_do_release flow to put + * refcount to sob + */ + cs_cmpl->hw_sob = encaps_sig_hdl->hw_sob; + cs_cmpl->sob_val = encaps_sig_hdl->pre_sob_val + + encaps_sig_hdl->count; + + dev_dbg(hdev->dev, "CS seq (%llu) added to encaps signal handler id (%u), count(%u), qidx(%u), sob(%u), val(%u)\n", + cs->sequence, encaps_sig_hdl->id, + encaps_sig_hdl->count, + encaps_sig_hdl->q_idx, + cs_cmpl->hw_sob->sob_id, + cs_cmpl->sob_val); + + } else { + dev_err(hdev->dev, "encaps handle id(%u) wasn't found!\n", + cs->encaps_sig_hdl_id); + rc = -EINVAL; + } + + spin_unlock(&mgr->lock); return rc; } @@ -581,14 +679,21 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) { rc = init_signal_wait_cs(cs); - if (rc) { - dev_err(hdev->dev, "Failed to submit signal cs\n"); + if (rc) goto unroll_cq_resv; - } - } else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) - hdev->asic_funcs->collective_wait_init_cs(cs); + } else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) { + rc = hdev->asic_funcs->collective_wait_init_cs(cs); + if (rc) + goto unroll_cq_resv; + } + if (cs->encaps_signals && cs->staged_first) { + rc = encaps_sig_first_staged_cs_handler(hdev, cs); + if (rc) + goto unroll_cq_resv; + } + spin_lock(&hdev->cs_mirror_lock); /* Verify staged CS exists and add to the staged list */ @@ -613,6 +718,11 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) } list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node); + + /* update stream map of the first CS */ + if (hdev->supports_wait_for_multi_cs) + staged_cs->fence->stream_master_qid_map |= + cs->fence->stream_master_qid_map; } list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list); @@ -834,6 +944,8 @@ static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx) hw_sob = &sync_stream_prop->hw_sob[sob]; hw_sob->hdev = hdev; hw_sob->sob_id = sync_stream_prop->base_sob_id + sob; + hw_sob->sob_addr = + hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id); hw_sob->q_idx = q_idx; kref_init(&hw_sob->kref); } diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index af339ce1ab4f..33986933aa9e 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -124,7 +124,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, spin_lock(&vm->idr_lock); handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0, - GFP_KERNEL); + GFP_ATOMIC); spin_unlock(&vm->idr_lock); if (handle < 0) { @@ -529,6 +529,33 @@ static inline int add_va_block(struct hl_device *hdev, } /** + * is_hint_crossing_range() - check if hint address crossing specified reserved + * range. + */ +static inline bool is_hint_crossing_range(enum hl_va_range_type range_type, + u64 start_addr, u32 size, struct asic_fixed_properties *prop) { + bool range_cross; + + if (range_type == HL_VA_RANGE_TYPE_DRAM) + range_cross = + hl_mem_area_crosses_range(start_addr, size, + prop->hints_dram_reserved_va_range.start_addr, + prop->hints_dram_reserved_va_range.end_addr); + else if (range_type == HL_VA_RANGE_TYPE_HOST) + range_cross = + hl_mem_area_crosses_range(start_addr, size, + prop->hints_host_reserved_va_range.start_addr, + prop->hints_host_reserved_va_range.end_addr); + else + range_cross = + hl_mem_area_crosses_range(start_addr, size, + prop->hints_host_hpage_reserved_va_range.start_addr, + prop->hints_host_hpage_reserved_va_range.end_addr); + + return range_cross; +} + +/** * get_va_block() - get a virtual block for the given size and alignment. * * @hdev: pointer to the habanalabs device structure. @@ -536,6 +563,8 @@ static inline int add_va_block(struct hl_device *hdev, * @size: requested block size. * @hint_addr: hint for requested address by the user. * @va_block_align: required alignment of the virtual block start address. + * @range_type: va range type (host, dram) + * @flags: additional memory flags, currently only uses HL_MEM_FORCE_HINT * * This function does the following: * - Iterate on the virtual block list to find a suitable virtual block for the @@ -545,13 +574,19 @@ static inline int add_va_block(struct hl_device *hdev, */ static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range, - u64 size, u64 hint_addr, u32 va_block_align) + u64 size, u64 hint_addr, u32 va_block_align, + enum hl_va_range_type range_type, + u32 flags) { struct hl_vm_va_block *va_block, *new_va_block = NULL; + struct asic_fixed_properties *prop = &hdev->asic_prop; u64 tmp_hint_addr, valid_start, valid_size, prev_start, prev_end, - align_mask, reserved_valid_start = 0, reserved_valid_size = 0; + align_mask, reserved_valid_start = 0, reserved_valid_size = 0, + dram_hint_mask = prop->dram_hints_align_mask; bool add_prev = false; bool is_align_pow_2 = is_power_of_2(va_range->page_size); + bool is_hint_dram_addr = hl_is_dram_va(hdev, hint_addr); + bool force_hint = flags & HL_MEM_FORCE_HINT; if (is_align_pow_2) align_mask = ~((u64)va_block_align - 1); @@ -564,12 +599,20 @@ static u64 get_va_block(struct hl_device *hdev, size = DIV_ROUND_UP_ULL(size, va_range->page_size) * va_range->page_size; - tmp_hint_addr = hint_addr; + tmp_hint_addr = hint_addr & ~dram_hint_mask; /* Check if we need to ignore hint address */ if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) || - (!is_align_pow_2 && - do_div(tmp_hint_addr, va_range->page_size))) { + (!is_align_pow_2 && is_hint_dram_addr && + do_div(tmp_hint_addr, va_range->page_size))) { + + if (force_hint) { + /* Hint must be respected, so here we just fail */ + dev_err(hdev->dev, + "Hint address 0x%llx is not page aligned - cannot be respected\n", + hint_addr); + return 0; + } dev_dbg(hdev->dev, "Hint address 0x%llx will be ignored because it is not aligned\n", @@ -596,6 +639,16 @@ static u64 get_va_block(struct hl_device *hdev, if (valid_size < size) continue; + /* + * In case hint address is 0, and arc_hints_range_reservation + * property enabled, then avoid allocating va blocks from the + * range reserved for hint addresses + */ + if (prop->hints_range_reservation && !hint_addr) + if (is_hint_crossing_range(range_type, valid_start, + size, prop)) + continue; + /* Pick the minimal length block which has the required size */ if (!new_va_block || (valid_size < reserved_valid_size)) { new_va_block = va_block; @@ -618,6 +671,17 @@ static u64 get_va_block(struct hl_device *hdev, goto out; } + if (force_hint && reserved_valid_start != hint_addr) { + /* Hint address must be respected. If we are here - this means + * we could not respect it. + */ + dev_err(hdev->dev, + "Hint address 0x%llx could not be respected\n", + hint_addr); + reserved_valid_start = 0; + goto out; + } + /* * Check if there is some leftover range due to reserving the new * va block, then return it to the main virtual addresses list. @@ -670,7 +734,8 @@ u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, enum hl_va_range_type type, u32 size, u32 alignment) { return get_va_block(hdev, ctx->va_range[type], size, 0, - max(alignment, ctx->va_range[type]->page_size)); + max(alignment, ctx->va_range[type]->page_size), + type, 0); } /** @@ -732,28 +797,15 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, } /** - * get_sg_info() - get number of pages and the DMA address from SG list. - * @sg: the SG list. - * @dma_addr: pointer to DMA address to return. - * - * Calculate the number of consecutive pages described by the SG list. Take the - * offset of the address in the first page, add to it the length and round it up - * to the number of needed pages. - */ -static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) -{ - *dma_addr = sg_dma_address(sg); - - return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) + - (PAGE_SIZE - 1)) >> PAGE_SHIFT; -} - -/** * init_phys_pg_pack_from_userptr() - initialize physical page pack from host * memory * @ctx: pointer to the context structure. * @userptr: userptr to initialize from. * @pphys_pg_pack: result pointer. + * @force_regular_page: tell the function to ignore huge page optimization, + * even if possible. Needed for cases where the device VA + * is allocated before we know the composition of the + * physical pages * * This function does the following: * - Pin the physical pages related to the given virtual block. @@ -762,17 +814,18 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) */ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, struct hl_userptr *userptr, - struct hl_vm_phys_pg_pack **pphys_pg_pack) + struct hl_vm_phys_pg_pack **pphys_pg_pack, + bool force_regular_page) { + u32 npages, page_size = PAGE_SIZE, + huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size; + u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size); struct hl_vm_phys_pg_pack *phys_pg_pack; + bool first = true, is_huge_page_opt; + u64 page_mask, total_npages; struct scatterlist *sg; dma_addr_t dma_addr; - u64 page_mask, total_npages; - u32 npages, page_size = PAGE_SIZE, - huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size; - bool first = true, is_huge_page_opt = true; int rc, i, j; - u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size); phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); if (!phys_pg_pack) @@ -783,6 +836,8 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, phys_pg_pack->asid = ctx->asid; atomic_set(&phys_pg_pack->mapping_cnt, 1); + is_huge_page_opt = (force_regular_page ? false : true); + /* Only if all dma_addrs are aligned to 2MB and their * sizes is at least 2MB, we can use huge page mapping. * We limit the 2MB optimization to this condition, @@ -791,7 +846,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, */ total_npages = 0; for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { - npages = get_sg_info(sg, &dma_addr); + npages = hl_get_sg_info(sg, &dma_addr); total_npages += npages; @@ -820,7 +875,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, j = 0; for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { - npages = get_sg_info(sg, &dma_addr); + npages = hl_get_sg_info(sg, &dma_addr); /* align down to physical page size and save the offset */ if (first) { @@ -1001,11 +1056,12 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, struct hl_userptr *userptr = NULL; struct hl_vm_hash_node *hnode; struct hl_va_range *va_range; - enum vm_type_t *vm_type; + enum vm_type *vm_type; u64 ret_vaddr, hint_addr; u32 handle = 0, va_block_align; int rc; bool is_userptr = args->flags & HL_MEM_USERPTR; + enum hl_va_range_type va_range_type = 0; /* Assume failure */ *device_addr = 0; @@ -1023,7 +1079,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, } rc = init_phys_pg_pack_from_userptr(ctx, userptr, - &phys_pg_pack); + &phys_pg_pack, false); if (rc) { dev_err(hdev->dev, "unable to init page pack for vaddr 0x%llx\n", @@ -1031,14 +1087,14 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, goto init_page_pack_err; } - vm_type = (enum vm_type_t *) userptr; + vm_type = (enum vm_type *) userptr; hint_addr = args->map_host.hint_addr; handle = phys_pg_pack->handle; /* get required alignment */ if (phys_pg_pack->page_size == page_size) { va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST]; - + va_range_type = HL_VA_RANGE_TYPE_HOST; /* * huge page alignment may be needed in case of regular * page mapping, depending on the host VA alignment @@ -1053,6 +1109,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, * mapping */ va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE]; + va_range_type = HL_VA_RANGE_TYPE_HOST_HUGE; va_block_align = huge_page_size; } } else { @@ -1072,12 +1129,13 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, spin_unlock(&vm->idr_lock); - vm_type = (enum vm_type_t *) phys_pg_pack; + vm_type = (enum vm_type *) phys_pg_pack; hint_addr = args->map_device.hint_addr; /* DRAM VA alignment is the same as the MMU page size */ va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM]; + va_range_type = HL_VA_RANGE_TYPE_DRAM; va_block_align = hdev->asic_prop.dmmu.page_size; } @@ -1100,8 +1158,23 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, goto hnode_err; } + if (hint_addr && phys_pg_pack->offset) { + if (args->flags & HL_MEM_FORCE_HINT) { + /* Fail if hint must be respected but it can't be */ + dev_err(hdev->dev, + "Hint address 0x%llx cannot be respected because source memory is not aligned 0x%x\n", + hint_addr, phys_pg_pack->offset); + rc = -EINVAL; + goto va_block_err; + } + dev_dbg(hdev->dev, + "Hint address 0x%llx will be ignored because source memory is not aligned 0x%x\n", + hint_addr, phys_pg_pack->offset); + } + ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size, - hint_addr, va_block_align); + hint_addr, va_block_align, + va_range_type, args->flags); if (!ret_vaddr) { dev_err(hdev->dev, "no available va block for handle %u\n", handle); @@ -1181,17 +1254,19 @@ init_page_pack_err: static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, bool ctx_free) { - struct hl_device *hdev = ctx->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; + u64 vaddr = args->unmap.device_virt_addr; struct hl_vm_hash_node *hnode = NULL; + struct asic_fixed_properties *prop; + struct hl_device *hdev = ctx->hdev; struct hl_userptr *userptr = NULL; struct hl_va_range *va_range; - u64 vaddr = args->unmap.device_virt_addr; - enum vm_type_t *vm_type; + enum vm_type *vm_type; bool is_userptr; int rc = 0; + prop = &hdev->asic_prop; + /* protect from double entrance */ mutex_lock(&ctx->mem_hash_lock); hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr) @@ -1214,8 +1289,9 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, if (*vm_type == VM_TYPE_USERPTR) { is_userptr = true; userptr = hnode->ptr; - rc = init_phys_pg_pack_from_userptr(ctx, userptr, - &phys_pg_pack); + + rc = init_phys_pg_pack_from_userptr(ctx, userptr, &phys_pg_pack, + false); if (rc) { dev_err(hdev->dev, "unable to init page pack for vaddr 0x%llx\n", @@ -1299,7 +1375,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, kfree(hnode); if (is_userptr) { - rc = free_phys_pg_pack(hdev, phys_pg_pack); + free_phys_pg_pack(hdev, phys_pg_pack); dma_unmap_host_va(hdev, userptr); } @@ -1669,6 +1745,7 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, return -EINVAL; } + userptr->pid = current->pid; userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL); if (!userptr->sgt) return -ENOMEM; @@ -2033,7 +2110,7 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) * another side effect error */ if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash)) - dev_notice(hdev->dev, + dev_dbg(hdev->dev, "user released device without removing its memory mappings\n"); hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) { diff --git a/drivers/misc/habanalabs/common/mmu/mmu_v1.c b/drivers/misc/habanalabs/common/mmu/mmu_v1.c index c5e93ff32586..0f536f79dd9c 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu_v1.c +++ b/drivers/misc/habanalabs/common/mmu/mmu_v1.c @@ -470,13 +470,13 @@ static void hl_mmu_v1_fini(struct hl_device *hdev) if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) { kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); - } - /* Make sure that if we arrive here again without init was called we - * won't cause kernel panic. This can happen for example if we fail - * during hard reset code at certain points - */ - hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL; + /* Make sure that if we arrive here again without init was + * called we won't cause kernel panic. This can happen for + * example if we fail during hard reset code at certain points + */ + hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL; + } } /** diff --git a/drivers/misc/habanalabs/common/pci/pci.c b/drivers/misc/habanalabs/common/pci/pci.c index d5bedf5ba011..0b5366cc84fd 100644 --- a/drivers/misc/habanalabs/common/pci/pci.c +++ b/drivers/misc/habanalabs/common/pci/pci.c @@ -436,6 +436,8 @@ int hl_pci_init(struct hl_device *hdev) goto unmap_pci_bars; } + dma_set_max_seg_size(&pdev->dev, U32_MAX); + return 0; unmap_pci_bars: diff --git a/drivers/misc/habanalabs/common/state_dump.c b/drivers/misc/habanalabs/common/state_dump.c new file mode 100644 index 000000000000..74726907c95e --- /dev/null +++ b/drivers/misc/habanalabs/common/state_dump.c @@ -0,0 +1,718 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2021 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include <linux/vmalloc.h> +#include <uapi/misc/habanalabs.h> +#include "habanalabs.h" + +/** + * hl_format_as_binary - helper function, format an integer as binary + * using supplied scratch buffer + * @buf: the buffer to use + * @buf_len: buffer capacity + * @n: number to format + * + * Returns pointer to buffer + */ +char *hl_format_as_binary(char *buf, size_t buf_len, u32 n) +{ + int i; + u32 bit; + bool leading0 = true; + char *wrptr = buf; + + if (buf_len > 0 && buf_len < 3) { + *wrptr = '\0'; + return buf; + } + + wrptr[0] = '0'; + wrptr[1] = 'b'; + wrptr += 2; + /* Remove 3 characters from length for '0b' and '\0' termination */ + buf_len -= 3; + + for (i = 0; i < sizeof(n) * BITS_PER_BYTE && buf_len; ++i, n <<= 1) { + /* Writing bit calculation in one line would cause a false + * positive static code analysis error, so splitting. + */ + bit = n & (1 << (sizeof(n) * BITS_PER_BYTE - 1)); + bit = !!bit; + leading0 &= !bit; + if (!leading0) { + *wrptr = '0' + bit; + ++wrptr; + } + } + + *wrptr = '\0'; + + return buf; +} + +/** + * resize_to_fit - helper function, resize buffer to fit given amount of data + * @buf: destination buffer double pointer + * @size: pointer to the size container + * @desired_size: size the buffer must contain + * + * Returns 0 on success or error code on failure. + * On success, the size of buffer is at least desired_size. Buffer is allocated + * via vmalloc and must be freed with vfree. + */ +static int resize_to_fit(char **buf, size_t *size, size_t desired_size) +{ + char *resized_buf; + size_t new_size; + + if (*size >= desired_size) + return 0; + + /* Not enough space to print all, have to resize */ + new_size = max_t(size_t, PAGE_SIZE, round_up(desired_size, PAGE_SIZE)); + resized_buf = vmalloc(new_size); + if (!resized_buf) + return -ENOMEM; + memcpy(resized_buf, *buf, *size); + vfree(*buf); + *buf = resized_buf; + *size = new_size; + + return 1; +} + +/** + * hl_snprintf_resize() - print formatted data to buffer, resize as needed + * @buf: buffer double pointer, to be written to and resized, must be either + * NULL or allocated with vmalloc. + * @size: current size of the buffer + * @offset: current offset to write to + * @format: format of the data + * + * This function will write formatted data into the buffer. If buffer is not + * large enough, it will be resized using vmalloc. Size may be modified if the + * buffer was resized, offset will be advanced by the number of bytes written + * not including the terminating character + * + * Returns 0 on success or error code on failure + * + * Note that the buffer has to be manually released using vfree. + */ +int hl_snprintf_resize(char **buf, size_t *size, size_t *offset, + const char *format, ...) +{ + va_list args; + size_t length; + int rc; + + if (*buf == NULL && (*size != 0 || *offset != 0)) + return -EINVAL; + + va_start(args, format); + length = vsnprintf(*buf + *offset, *size - *offset, format, args); + va_end(args); + + rc = resize_to_fit(buf, size, *offset + length + 1); + if (rc < 0) + return rc; + else if (rc > 0) { + /* Resize was needed, write again */ + va_start(args, format); + length = vsnprintf(*buf + *offset, *size - *offset, format, + args); + va_end(args); + } + + *offset += length; + + return 0; +} + +/** + * hl_sync_engine_to_string - convert engine type enum to string literal + * @engine_type: engine type (TPC/MME/DMA) + * + * Return the resolved string literal + */ +const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type) +{ + switch (engine_type) { + case ENGINE_DMA: + return "DMA"; + case ENGINE_MME: + return "MME"; + case ENGINE_TPC: + return "TPC"; + } + return "Invalid Engine Type"; +} + +/** + * hl_print_resize_sync_engine - helper function, format engine name and ID + * using hl_snprintf_resize + * @buf: destination buffer double pointer to be used with hl_snprintf_resize + * @size: pointer to the size container + * @offset: pointer to the offset container + * @engine_type: engine type (TPC/MME/DMA) + * @engine_id: engine numerical id + * + * Returns 0 on success or error code on failure + */ +static int hl_print_resize_sync_engine(char **buf, size_t *size, size_t *offset, + enum hl_sync_engine_type engine_type, + u32 engine_id) +{ + return hl_snprintf_resize(buf, size, offset, "%s%u", + hl_sync_engine_to_string(engine_type), engine_id); +} + +/** + * hl_state_dump_get_sync_name - transform sync object id to name if available + * @hdev: pointer to the device + * @sync_id: sync object id + * + * Returns a name literal or NULL if not resolved. + * Note: returning NULL shall not be considered as a failure, as not all + * sync objects are named. + */ +const char *hl_state_dump_get_sync_name(struct hl_device *hdev, u32 sync_id) +{ + struct hl_state_dump_specs *sds = &hdev->state_dump_specs; + struct hl_hw_obj_name_entry *entry; + + hash_for_each_possible(sds->so_id_to_str_tb, entry, + node, sync_id) + if (sync_id == entry->id) + return entry->name; + + return NULL; +} + +/** + * hl_state_dump_get_monitor_name - transform monitor object dump to monitor + * name if available + * @hdev: pointer to the device + * @mon: monitor state dump + * + * Returns a name literal or NULL if not resolved. + * Note: returning NULL shall not be considered as a failure, as not all + * monitors are named. + */ +const char *hl_state_dump_get_monitor_name(struct hl_device *hdev, + struct hl_mon_state_dump *mon) +{ + struct hl_state_dump_specs *sds = &hdev->state_dump_specs; + struct hl_hw_obj_name_entry *entry; + + hash_for_each_possible(sds->monitor_id_to_str_tb, + entry, node, mon->id) + if (mon->id == entry->id) + return entry->name; + + return NULL; +} + +/** + * hl_state_dump_free_sync_to_engine_map - free sync object to engine map + * @map: sync object to engine map + * + * Note: generic free implementation, the allocation is implemented per ASIC. + */ +void hl_state_dump_free_sync_to_engine_map(struct hl_sync_to_engine_map *map) +{ + struct hl_sync_to_engine_map_entry *entry; + struct hlist_node *tmp_node; + int i; + + hash_for_each_safe(map->tb, i, tmp_node, entry, node) { + hash_del(&entry->node); + kfree(entry); + } +} + +/** + * hl_state_dump_get_sync_to_engine - transform sync_id to + * hl_sync_to_engine_map_entry if available for current id + * @map: sync object to engine map + * @sync_id: sync object id + * + * Returns the translation entry if found or NULL if not. + * Note, returned NULL shall not be considered as a failure as the map + * does not cover all possible, it is a best effort sync ids. + */ +static struct hl_sync_to_engine_map_entry * +hl_state_dump_get_sync_to_engine(struct hl_sync_to_engine_map *map, u32 sync_id) +{ + struct hl_sync_to_engine_map_entry *entry; + + hash_for_each_possible(map->tb, entry, node, sync_id) + if (entry->sync_id == sync_id) + return entry; + return NULL; +} + +/** + * hl_state_dump_read_sync_objects - read sync objects array + * @hdev: pointer to the device + * @index: sync manager block index starting with E_N + * + * Returns array of size SP_SYNC_OBJ_AMOUNT on success or NULL on failure + */ +static u32 *hl_state_dump_read_sync_objects(struct hl_device *hdev, u32 index) +{ + struct hl_state_dump_specs *sds = &hdev->state_dump_specs; + u32 *sync_objects; + s64 base_addr; /* Base addr can be negative */ + int i; + + base_addr = sds->props[SP_SYNC_OBJ_BASE_ADDR] + + sds->props[SP_NEXT_SYNC_OBJ_ADDR] * index; + + sync_objects = vmalloc(sds->props[SP_SYNC_OBJ_AMOUNT] * sizeof(u32)); + if (!sync_objects) + return NULL; + + for (i = 0; i < sds->props[SP_SYNC_OBJ_AMOUNT]; ++i) + sync_objects[i] = RREG32(base_addr + i * sizeof(u32)); + + return sync_objects; +} + +/** + * hl_state_dump_free_sync_objects - free sync objects array allocated by + * hl_state_dump_read_sync_objects + * @sync_objects: sync objects array + */ +static void hl_state_dump_free_sync_objects(u32 *sync_objects) +{ + vfree(sync_objects); +} + + +/** + * hl_state_dump_print_syncs_single_block - print active sync objects on a + * single block + * @hdev: pointer to the device + * @index: sync manager block index starting with E_N + * @buf: destination buffer double pointer to be used with hl_snprintf_resize + * @size: pointer to the size container + * @offset: pointer to the offset container + * @map: sync engines names map + * + * Returns 0 on success or error code on failure + */ +static int +hl_state_dump_print_syncs_single_block(struct hl_device *hdev, u32 index, + char **buf, size_t *size, size_t *offset, + struct hl_sync_to_engine_map *map) +{ + struct hl_state_dump_specs *sds = &hdev->state_dump_specs; + const char *sync_name; + u32 *sync_objects = NULL; + int rc = 0, i; + + if (sds->sync_namager_names) { + rc = hl_snprintf_resize( + buf, size, offset, "%s\n", + sds->sync_namager_names[index]); + if (rc) + goto out; + } + + sync_objects = hl_state_dump_read_sync_objects(hdev, index); + if (!sync_objects) { + rc = -ENOMEM; + goto out; + } + + for (i = 0; i < sds->props[SP_SYNC_OBJ_AMOUNT]; ++i) { + struct hl_sync_to_engine_map_entry *entry; + u64 sync_object_addr; + + if (!sync_objects[i]) + continue; + + sync_object_addr = sds->props[SP_SYNC_OBJ_BASE_ADDR] + + sds->props[SP_NEXT_SYNC_OBJ_ADDR] * index + + i * sizeof(u32); + + rc = hl_snprintf_resize(buf, size, offset, "sync id: %u", i); + if (rc) + goto free_sync_objects; + sync_name = hl_state_dump_get_sync_name(hdev, i); + if (sync_name) { + rc = hl_snprintf_resize(buf, size, offset, " %s", + sync_name); + if (rc) + goto free_sync_objects; + } + rc = hl_snprintf_resize(buf, size, offset, ", value: %u", + sync_objects[i]); + if (rc) + goto free_sync_objects; + + /* Append engine string */ + entry = hl_state_dump_get_sync_to_engine(map, + (u32)sync_object_addr); + if (entry) { + rc = hl_snprintf_resize(buf, size, offset, + ", Engine: "); + if (rc) + goto free_sync_objects; + rc = hl_print_resize_sync_engine(buf, size, offset, + entry->engine_type, + entry->engine_id); + if (rc) + goto free_sync_objects; + } + + rc = hl_snprintf_resize(buf, size, offset, "\n"); + if (rc) + goto free_sync_objects; + } + +free_sync_objects: + hl_state_dump_free_sync_objects(sync_objects); +out: + return rc; +} + +/** + * hl_state_dump_print_syncs - print active sync objects + * @hdev: pointer to the device + * @buf: destination buffer double pointer to be used with hl_snprintf_resize + * @size: pointer to the size container + * @offset: pointer to the offset container + * + * Returns 0 on success or error code on failure + */ +static int hl_state_dump_print_syncs(struct hl_device *hdev, + char **buf, size_t *size, + size_t *offset) + +{ + struct hl_state_dump_specs *sds = &hdev->state_dump_specs; + struct hl_sync_to_engine_map *map; + u32 index; + int rc = 0; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + + rc = sds->funcs.gen_sync_to_engine_map(hdev, map); + if (rc) + goto free_map_mem; + + rc = hl_snprintf_resize(buf, size, offset, "Non zero sync objects:\n"); + if (rc) + goto out; + + if (sds->sync_namager_names) { + for (index = 0; sds->sync_namager_names[index]; ++index) { + rc = hl_state_dump_print_syncs_single_block( + hdev, index, buf, size, offset, map); + if (rc) + goto out; + } + } else { + for (index = 0; index < sds->props[SP_NUM_CORES]; ++index) { + rc = hl_state_dump_print_syncs_single_block( + hdev, index, buf, size, offset, map); + if (rc) + goto out; + } + } + +out: + hl_state_dump_free_sync_to_engine_map(map); +free_map_mem: + kfree(map); + + return rc; +} + +/** + * hl_state_dump_alloc_read_sm_block_monitors - read monitors for a specific + * block + * @hdev: pointer to the device + * @index: sync manager block index starting with E_N + * + * Returns an array of monitor data of size SP_MONITORS_AMOUNT or NULL + * on error + */ +static struct hl_mon_state_dump * +hl_state_dump_alloc_read_sm_block_monitors(struct hl_device *hdev, u32 index) +{ + struct hl_state_dump_specs *sds = &hdev->state_dump_specs; + struct hl_mon_state_dump *monitors; + s64 base_addr; /* Base addr can be negative */ + int i; + + monitors = vmalloc(sds->props[SP_MONITORS_AMOUNT] * + sizeof(struct hl_mon_state_dump)); + if (!monitors) + return NULL; + + base_addr = sds->props[SP_NEXT_SYNC_OBJ_ADDR] * index; + + for (i = 0; i < sds->props[SP_MONITORS_AMOUNT]; ++i) { + monitors[i].id = i; + monitors[i].wr_addr_low = + RREG32(base_addr + sds->props[SP_MON_OBJ_WR_ADDR_LOW] + + i * sizeof(u32)); + + monitors[i].wr_addr_high = + RREG32(base_addr + sds->props[SP_MON_OBJ_WR_ADDR_HIGH] + + i * sizeof(u32)); + + monitors[i].wr_data = + RREG32(base_addr + sds->props[SP_MON_OBJ_WR_DATA] + + i * sizeof(u32)); + + monitors[i].arm_data = + RREG32(base_addr + sds->props[SP_MON_OBJ_ARM_DATA] + + i * sizeof(u32)); + + monitors[i].status = + RREG32(base_addr + sds->props[SP_MON_OBJ_STATUS] + + i * sizeof(u32)); + } + + return monitors; +} + +/** + * hl_state_dump_free_monitors - free the monitors structure + * @monitors: monitors array created with + * hl_state_dump_alloc_read_sm_block_monitors + */ +static void hl_state_dump_free_monitors(struct hl_mon_state_dump *monitors) +{ + vfree(monitors); +} + +/** + * hl_state_dump_print_monitors_single_block - print active monitors on a + * single block + * @hdev: pointer to the device + * @index: sync manager block index starting with E_N + * @buf: destination buffer double pointer to be used with hl_snprintf_resize + * @size: pointer to the size container + * @offset: pointer to the offset container + * + * Returns 0 on success or error code on failure + */ +static int hl_state_dump_print_monitors_single_block(struct hl_device *hdev, + u32 index, + char **buf, size_t *size, + size_t *offset) +{ + struct hl_state_dump_specs *sds = &hdev->state_dump_specs; + struct hl_mon_state_dump *monitors = NULL; + int rc = 0, i; + + if (sds->sync_namager_names) { + rc = hl_snprintf_resize( + buf, size, offset, "%s\n", + sds->sync_namager_names[index]); + if (rc) + goto out; + } + + monitors = hl_state_dump_alloc_read_sm_block_monitors(hdev, index); + if (!monitors) { + rc = -ENOMEM; + goto out; + } + + for (i = 0; i < sds->props[SP_MONITORS_AMOUNT]; ++i) { + if (!(sds->funcs.monitor_valid(&monitors[i]))) + continue; + + /* Monitor is valid, dump it */ + rc = sds->funcs.print_single_monitor(buf, size, offset, hdev, + &monitors[i]); + if (rc) + goto free_monitors; + + hl_snprintf_resize(buf, size, offset, "\n"); + } + +free_monitors: + hl_state_dump_free_monitors(monitors); +out: + return rc; +} + +/** + * hl_state_dump_print_monitors - print active monitors + * @hdev: pointer to the device + * @buf: destination buffer double pointer to be used with hl_snprintf_resize + * @size: pointer to the size container + * @offset: pointer to the offset container + * + * Returns 0 on success or error code on failure + */ +static int hl_state_dump_print_monitors(struct hl_device *hdev, + char **buf, size_t *size, + size_t *offset) +{ + struct hl_state_dump_specs *sds = &hdev->state_dump_specs; + u32 index; + int rc = 0; + + rc = hl_snprintf_resize(buf, size, offset, + "Valid (armed) monitor objects:\n"); + if (rc) + goto out; + + if (sds->sync_namager_names) { + for (index = 0; sds->sync_namager_names[index]; ++index) { + rc = hl_state_dump_print_monitors_single_block( + hdev, index, buf, size, offset); + if (rc) + goto out; + } + } else { + for (index = 0; index < sds->props[SP_NUM_CORES]; ++index) { + rc = hl_state_dump_print_monitors_single_block( + hdev, index, buf, size, offset); + if (rc) + goto out; + } + } + +out: + return rc; +} + +/** + * hl_state_dump_print_engine_fences - print active fences for a specific + * engine + * @hdev: pointer to the device + * @engine_type: engine type to use + * @buf: destination buffer double pointer to be used with hl_snprintf_resize + * @size: pointer to the size container + * @offset: pointer to the offset container + */ +static int +hl_state_dump_print_engine_fences(struct hl_device *hdev, + enum hl_sync_engine_type engine_type, + char **buf, size_t *size, size_t *offset) +{ + struct hl_state_dump_specs *sds = &hdev->state_dump_specs; + int rc = 0, i, n_fences; + u64 base_addr, next_fence; + + switch (engine_type) { + case ENGINE_TPC: + n_fences = sds->props[SP_NUM_OF_TPC_ENGINES]; + base_addr = sds->props[SP_TPC0_CMDQ]; + next_fence = sds->props[SP_NEXT_TPC]; + break; + case ENGINE_MME: + n_fences = sds->props[SP_NUM_OF_MME_ENGINES]; + base_addr = sds->props[SP_MME_CMDQ]; + next_fence = sds->props[SP_NEXT_MME]; + break; + case ENGINE_DMA: + n_fences = sds->props[SP_NUM_OF_DMA_ENGINES]; + base_addr = sds->props[SP_DMA_CMDQ]; + next_fence = sds->props[SP_DMA_QUEUES_OFFSET]; + break; + default: + return -EINVAL; + } + for (i = 0; i < n_fences; ++i) { + rc = sds->funcs.print_fences_single_engine( + hdev, + base_addr + next_fence * i + + sds->props[SP_FENCE0_CNT_OFFSET], + base_addr + next_fence * i + + sds->props[SP_CP_STS_OFFSET], + engine_type, i, buf, size, offset); + if (rc) + goto out; + } +out: + return rc; +} + +/** + * hl_state_dump_print_fences - print active fences + * @hdev: pointer to the device + * @buf: destination buffer double pointer to be used with hl_snprintf_resize + * @size: pointer to the size container + * @offset: pointer to the offset container + */ +static int hl_state_dump_print_fences(struct hl_device *hdev, char **buf, + size_t *size, size_t *offset) +{ + int rc = 0; + + rc = hl_snprintf_resize(buf, size, offset, "Valid (armed) fences:\n"); + if (rc) + goto out; + + rc = hl_state_dump_print_engine_fences(hdev, ENGINE_TPC, buf, size, offset); + if (rc) + goto out; + + rc = hl_state_dump_print_engine_fences(hdev, ENGINE_MME, buf, size, offset); + if (rc) + goto out; + + rc = hl_state_dump_print_engine_fences(hdev, ENGINE_DMA, buf, size, offset); + if (rc) + goto out; + +out: + return rc; +} + +/** + * hl_state_dump() - dump system state + * @hdev: pointer to device structure + */ +int hl_state_dump(struct hl_device *hdev) +{ + char *buf = NULL; + size_t offset = 0, size = 0; + int rc; + + rc = hl_snprintf_resize(&buf, &size, &offset, + "Timestamp taken on: %llu\n\n", + ktime_to_ns(ktime_get())); + if (rc) + goto err; + + rc = hl_state_dump_print_syncs(hdev, &buf, &size, &offset); + if (rc) + goto err; + + hl_snprintf_resize(&buf, &size, &offset, "\n"); + + rc = hl_state_dump_print_monitors(hdev, &buf, &size, &offset); + if (rc) + goto err; + + hl_snprintf_resize(&buf, &size, &offset, "\n"); + + rc = hl_state_dump_print_fences(hdev, &buf, &size, &offset); + if (rc) + goto err; + + hl_snprintf_resize(&buf, &size, &offset, "\n"); + + hl_debugfs_set_state_dump(hdev, buf, size); + + return 0; +err: + vfree(buf); + return rc; +} diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index db72df282ef8..34f9f2779962 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -9,8 +9,7 @@ #include <linux/pci.h> -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, - bool curr) +long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) { struct cpucp_packet pkt; u32 used_pll_idx; @@ -44,8 +43,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, return (long) result; } -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, - u64 freq) +void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) { struct cpucp_packet pkt; u32 used_pll_idx; @@ -285,16 +283,12 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hl_device *hdev = dev_get_drvdata(dev); - char *str; + char str[HL_STR_MAX]; - if (atomic_read(&hdev->in_reset)) - str = "In reset"; - else if (hdev->disabled) - str = "Malfunction"; - else if (hdev->needs_reset) - str = "Needs Reset"; - else - str = "Operational"; + strscpy(str, hdev->status[hl_device_status(hdev)], HL_STR_MAX); + + /* use uppercase for backward compatibility */ + str[0] = 'A' + (str[0] - 'a'); return sprintf(buf, "%s\n", str); } diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index aa8a0ca5aca2..383865be3c2c 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -76,7 +76,7 @@ #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) -#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ +#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */ #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */ @@ -106,6 +106,21 @@ #define GAUDI_PLL_MAX 10 +#define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010") + +#define MONITOR_SOB_STRING_SIZE 256 + +static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = { + GAUDI_QUEUE_ID_DMA_0_0, + GAUDI_QUEUE_ID_DMA_0_1, + GAUDI_QUEUE_ID_DMA_0_2, + GAUDI_QUEUE_ID_DMA_0_3, + GAUDI_QUEUE_ID_DMA_1_0, + GAUDI_QUEUE_ID_DMA_1_1, + GAUDI_QUEUE_ID_DMA_1_2, + GAUDI_QUEUE_ID_DMA_1_3 +}; + static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", @@ -348,6 +363,97 @@ static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = { QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */ }; +static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = { + { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" }, + { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" }, + { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" }, + { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" }, + { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" }, + { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" }, + { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" }, + { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" }, + { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" }, + { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" }, + { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" }, + { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" }, + { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" }, + { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" }, + { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" }, + { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" }, + { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" }, + { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" }, + { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" }, + { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" }, + { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" }, + { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" }, + { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" }, + { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" }, + { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" }, + { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" }, + { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" }, +}; + +static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = { + { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" }, + { .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" }, + { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" }, + { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" }, + { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" }, + { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" }, + { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" }, + { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" }, + { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" }, + { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" }, + { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" }, +}; + +static s64 gaudi_state_dump_specs_props[] = { + [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0, + [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL, + [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK, + [SP_MON_OBJ_WR_ADDR_LOW] = + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0, + [SP_MON_OBJ_WR_ADDR_HIGH] = + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0, + [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0, + [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0, + [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0, + [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK, + [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0, + [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR, + [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0, + [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0, + [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL, + [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0, + [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0, + [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO, + [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0, + [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES, + [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES, + [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES, + [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES, + [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES, + [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS, + [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES, + [SP_FENCE0_CNT_OFFSET] = + mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0, + [SP_FENCE0_RDATA_OFFSET] = + mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0, + [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0, + [SP_NUM_CORES] = 1, +}; + +/* The order here is opposite to the order of the indexing in the h/w. + * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc. + */ +static const char * const gaudi_sync_manager_names[] = { + "SYNC_MGR_E_N", + "SYNC_MGR_W_N", + "SYNC_MGR_E_S", + "SYNC_MGR_W_S", + NULL +}; + struct ecc_info_extract_params { u64 block_address; u32 num_memories; @@ -363,8 +469,6 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size, u64 val); static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, u32 num_regs, u32 val); -static int gaudi_schedule_register_memset(struct hl_device *hdev, - u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val); static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id); static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev); @@ -375,7 +479,6 @@ static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb); static u32 gaudi_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop); - static inline enum hl_collective_mode get_collective_mode(struct hl_device *hdev, u32 queue_id) { @@ -403,7 +506,11 @@ static inline void set_default_power_values(struct hl_device *hdev) if (hdev->card_type == cpucp_card_type_pmc) { prop->max_power_default = MAX_POWER_DEFAULT_PMC; - prop->dc_power_default = DC_POWER_DEFAULT_PMC; + + if (prop->fw_security_enabled) + prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC; + else + prop->dc_power_default = DC_POWER_DEFAULT_PMC; } else { prop->max_power_default = MAX_POWER_DEFAULT_PCI; prop->dc_power_default = DC_POWER_DEFAULT_PCI; @@ -450,6 +557,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) get_collective_mode(hdev, i); } + prop->device_dma_offset_for_host_access = HOST_PHYS_BASE; prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; prop->collective_first_sob = 0; prop->collective_first_mon = 0; @@ -551,6 +659,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->hard_reset_done_by_fw = false; prop->gic_interrupts_enable = true; + prop->server_type = HL_SERVER_TYPE_UNKNOWN; + return 0; } @@ -723,14 +833,14 @@ pci_init: GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC); if (rc) { if (hdev->reset_on_preboot_fail) - hdev->asic_funcs->hw_fini(hdev, true); + hdev->asic_funcs->hw_fini(hdev, true, false); goto pci_fini; } if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { dev_info(hdev->dev, "H/W state is dirty, must reset before initializing\n"); - hdev->asic_funcs->hw_fini(hdev, true); + hdev->asic_funcs->hw_fini(hdev, true, false); } return 0; @@ -974,17 +1084,11 @@ static void gaudi_sob_group_hw_reset(struct kref *ref) struct gaudi_hw_sob_group *hw_sob_group = container_of(ref, struct gaudi_hw_sob_group, kref); struct hl_device *hdev = hw_sob_group->hdev; - u64 base_addr; - int rc; + int i; - base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + - hw_sob_group->base_sob_id * 4; - rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id, - base_addr, NUMBER_OF_SOBS_IN_GRP, 0); - if (rc) - dev_err(hdev->dev, - "failed resetting sob group - sob base %u, count %u", - hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP); + for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++) + WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + + (hw_sob_group->base_sob_id * 4) + (i * 4)), 0); kref_init(&hw_sob_group->kref); } @@ -1121,6 +1225,20 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev, queue_id = job->hw_queue_id; prop = &hdev->kernel_queues[queue_id].sync_stream_prop; + if (job->cs->encaps_signals) { + /* use the encaps signal handle store earlier in the flow + * and set the SOB information from the encaps + * signals handle + */ + hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job, + cs_cmpl); + + dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n", + job->cs->sequence, + cs_cmpl->hw_sob->sob_id, + cs_cmpl->sob_val); + } + /* Add to wait CBs using slave monitor */ wait_prop.data = (void *) job->user_cb; wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; @@ -1131,7 +1249,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev, wait_prop.size = cb_size; dev_dbg(hdev->dev, - "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n", + "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n", cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, prop->collective_slave_mon_id, queue_id); @@ -1145,7 +1263,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev, prop->collective_sob_id, cb_size, false); } -static void gaudi_collective_wait_init_cs(struct hl_cs *cs) +static int gaudi_collective_wait_init_cs(struct hl_cs *cs) { struct hl_cs_compl *signal_cs_cmpl = container_of(cs->signal_fence, struct hl_cs_compl, base_fence); @@ -1163,9 +1281,37 @@ static void gaudi_collective_wait_init_cs(struct hl_cs *cs) gaudi = hdev->asic_specific; cprop = &gaudi->collective_props; - /* copy the SOB id and value of the signal CS */ - cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; - cs_cmpl->sob_val = signal_cs_cmpl->sob_val; + /* In encaps signals case the SOB info will be retrieved from + * the handle in gaudi_collective_slave_init_job. + */ + if (!cs->encaps_signals) { + /* copy the SOB id and value of the signal CS */ + cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; + cs_cmpl->sob_val = signal_cs_cmpl->sob_val; + } + + /* check again if the signal cs already completed. + * if yes then don't send any wait cs since the hw_sob + * could be in reset already. if signal is not completed + * then get refcount to hw_sob to prevent resetting the sob + * while wait cs is not submitted. + * note that this check is protected by two locks, + * hw queue lock and completion object lock, + * and the same completion object lock also protects + * the hw_sob reset handler function. + * The hw_queue lock prevent out of sync of hw_sob + * refcount value, changed by signal/wait flows. + */ + spin_lock(&signal_cs_cmpl->lock); + + if (completion_done(&cs->signal_fence->completion)) { + spin_unlock(&signal_cs_cmpl->lock); + return -EINVAL; + } + /* Increment kref since all slave queues are now waiting on it */ + kref_get(&cs_cmpl->hw_sob->kref); + + spin_unlock(&signal_cs_cmpl->lock); /* Calculate the stream from collective master queue (1st job) */ job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node); @@ -1210,21 +1356,17 @@ static void gaudi_collective_wait_init_cs(struct hl_cs *cs) cprop->curr_sob_group_idx[stream], stream); } - /* Increment kref since all slave queues are now waiting on it */ - kref_get(&cs_cmpl->hw_sob->kref); - /* - * Must put the signal fence after the SOB refcnt increment so - * the SOB refcnt won't turn 0 and reset the SOB before the - * wait CS was submitted. - */ mb(); hl_fence_put(cs->signal_fence); cs->signal_fence = NULL; + + return 0; } static int gaudi_collective_wait_create_job(struct hl_device *hdev, struct hl_ctx *ctx, struct hl_cs *cs, - enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id) + enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id, + u32 encaps_signal_offset) { struct hw_queue_properties *hw_queue_prop; struct hl_cs_counters_atomic *cntr; @@ -1284,6 +1426,13 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev, job->user_cb_size = cb_size; job->hw_queue_id = queue_id; + /* since its guaranteed to have only one chunk in the collective wait + * cs, we can use this chunk to set the encapsulated signal offset + * in the jobs. + */ + if (cs->encaps_signals) + job->encaps_sig_wait_offset = encaps_signal_offset; + /* * No need in parsing, user CB is the patched CB. * We call hl_cb_destroy() out of two reasons - we don't need @@ -1312,8 +1461,9 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev, } static int gaudi_collective_wait_create_jobs(struct hl_device *hdev, - struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id, - u32 collective_engine_id) + struct hl_ctx *ctx, struct hl_cs *cs, + u32 wait_queue_id, u32 collective_engine_id, + u32 encaps_signal_offset) { struct gaudi_device *gaudi = hdev->asic_specific; struct hw_queue_properties *hw_queue_prop; @@ -1363,7 +1513,8 @@ static int gaudi_collective_wait_create_jobs(struct hl_device *hdev, if (i == 0) { queue_id = wait_queue_id; rc = gaudi_collective_wait_create_job(hdev, ctx, cs, - HL_COLLECTIVE_MASTER, queue_id, wait_queue_id); + HL_COLLECTIVE_MASTER, queue_id, + wait_queue_id, encaps_signal_offset); } else { if (nic_idx < NIC_NUMBER_OF_ENGINES) { if (gaudi->hw_cap_initialized & @@ -1383,7 +1534,8 @@ static int gaudi_collective_wait_create_jobs(struct hl_device *hdev, } rc = gaudi_collective_wait_create_job(hdev, ctx, cs, - HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id); + HL_COLLECTIVE_SLAVE, queue_id, + wait_queue_id, encaps_signal_offset); } if (rc) @@ -1431,6 +1583,11 @@ static int gaudi_late_init(struct hl_device *hdev) return rc; } + /* Scrub both SRAM and DRAM */ + rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0); + if (rc) + goto disable_pci_access; + rc = gaudi_fetch_psoc_frequency(hdev); if (rc) { dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); @@ -1455,6 +1612,11 @@ static int gaudi_late_init(struct hl_device *hdev) goto disable_pci_access; } + /* We only support a single ASID for the user, so for the sake of optimization, just + * initialize the ASID one time during device initialization with the fixed value of 1 + */ + gaudi_mmu_prepare(hdev, 1); + return 0; disable_pci_access: @@ -1720,8 +1882,12 @@ static int gaudi_sw_init(struct hl_device *hdev) hdev->supports_sync_stream = true; hdev->supports_coresight = true; hdev->supports_staged_submission = true; + hdev->supports_wait_for_multi_cs = true; - gaudi_set_pci_memory_regions(hdev); + hdev->asic_funcs->set_pci_memory_regions(hdev); + hdev->stream_master_qid_arr = + hdev->asic_funcs->get_stream_master_qid_arr(); + hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE; return 0; @@ -2523,7 +2689,7 @@ static void gaudi_init_golden_registers(struct hl_device *hdev) tpc_id < TPC_NUMBER_OF_ENGINES; tpc_id++, tpc_offset += TPC_CFG_OFFSET) { /* Mask all arithmetic interrupts from TPC */ - WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF); + WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE); /* Set 16 cache lines */ WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset, ICACHE_FETCH_LINE_NUM, 2); @@ -3670,7 +3836,7 @@ static void gaudi_disable_timestamp(struct hl_device *hdev) WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); } -static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset) +static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) { u32 wait_timeout_ms; @@ -3682,6 +3848,9 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset) else wait_timeout_ms = GAUDI_RESET_WAIT_MSEC; + if (fw_reset) + goto skip_engines; + gaudi_stop_nic_qmans(hdev); gaudi_stop_mme_qmans(hdev); gaudi_stop_tpc_qmans(hdev); @@ -3707,6 +3876,7 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset) gaudi_disable_timestamp(hdev); +skip_engines: gaudi_disable_msi(hdev); } @@ -3739,6 +3909,9 @@ static int gaudi_mmu_init(struct hl_device *hdev) WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8); WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40); + /* mem cache invalidation */ + WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1); + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0); WREG32(mmMMU_UP_MMU_ENABLE, 1); @@ -4071,7 +4244,7 @@ disable_queues: return rc; } -static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) +static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) { struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; @@ -4092,6 +4265,14 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC; } + if (fw_reset) { + dev_info(hdev->dev, + "Firmware performs HARD reset, going to wait %dms\n", + reset_timeout_ms); + + goto skip_reset; + } + driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled && !hdev->asic_prop.hard_reset_done_by_fw); @@ -4168,6 +4349,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) reset_timeout_ms); } +skip_reset: /* * After hard reset, we can't poll the BTM_FSM register because the PSOC * itself is in reset. Need to wait until the reset is deasserted @@ -4212,7 +4394,7 @@ static int gaudi_resume(struct hl_device *hdev) return gaudi_init_iatu(hdev); } -static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, +static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size) { int rc; @@ -4621,8 +4803,8 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev) "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n", cur_addr, cur_addr + chunk_size); - WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0); - WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0); + WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf); + WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf); WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(cur_addr)); WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, @@ -5796,78 +5978,6 @@ release_cb: return rc; } -static int gaudi_schedule_register_memset(struct hl_device *hdev, - u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val) -{ - struct hl_ctx *ctx; - struct hl_pending_cb *pending_cb; - struct packet_msg_long *pkt; - u32 cb_size, ctl; - struct hl_cb *cb; - int i, rc; - - mutex_lock(&hdev->fpriv_list_lock); - ctx = hdev->compute_ctx; - - /* If no compute context available or context is going down - * memset registers directly - */ - if (!ctx || kref_read(&ctx->refcount) == 0) { - rc = gaudi_memset_registers(hdev, reg_base, num_regs, val); - mutex_unlock(&hdev->fpriv_list_lock); - return rc; - } - - mutex_unlock(&hdev->fpriv_list_lock); - - cb_size = (sizeof(*pkt) * num_regs) + - sizeof(struct packet_msg_prot) * 2; - - if (cb_size > SZ_2M) { - dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M); - return -ENOMEM; - } - - pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL); - if (!pending_cb) - return -ENOMEM; - - cb = hl_cb_kernel_create(hdev, cb_size, false); - if (!cb) { - kfree(pending_cb); - return -EFAULT; - } - - pkt = cb->kernel_address; - - ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */ - ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG); - ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); - ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); - ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); - - for (i = 0; i < num_regs ; i++, pkt++) { - pkt->ctl = cpu_to_le32(ctl); - pkt->value = cpu_to_le32(val); - pkt->addr = cpu_to_le64(reg_base + (i * 4)); - } - - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); - - pending_cb->cb = cb; - pending_cb->cb_size = cb_size; - /* The queue ID MUST be an external queue ID. Otherwise, we will - * have undefined behavior - */ - pending_cb->hw_queue_id = hw_queue_id; - - spin_lock(&ctx->pending_cb_lock); - list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list); - spin_unlock(&ctx->pending_cb_lock); - - return 0; -} - static int gaudi_restore_sm_registers(struct hl_device *hdev) { u64 base_addr; @@ -6013,7 +6123,7 @@ static int gaudi_restore_user_registers(struct hl_device *hdev) static int gaudi_context_switch(struct hl_device *hdev, u32 asid) { - return gaudi_restore_user_registers(hdev); + return 0; } static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev) @@ -6723,6 +6833,9 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) asid); } + gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); + gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); @@ -6772,7 +6885,8 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev, dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET; - WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT)); + WREG32(mmDMA0_CORE_PROT + dma_offset, + BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT)); rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0, job->job_cb_size, cb->bus_address); @@ -6793,8 +6907,7 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev, } free_fence_ptr: - WREG32_AND(mmDMA0_CORE_PROT + dma_offset, - ~BIT(DMA0_CORE_PROT_VAL_SHIFT)); + WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT)); hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); @@ -7168,7 +7281,7 @@ static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); size = RREG32(cq_tsize); - dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n", + dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n", stream, cq_ptr, size); } @@ -7224,7 +7337,7 @@ static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, addr = le64_to_cpu(bd->ptr); - dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n", + dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n", stream, ci, addr, len); /* get previous ci, wrap if needed */ @@ -7326,24 +7439,30 @@ static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type, { u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0; + /* Flip the bits as the enum is ordered in the opposite way */ + index = (index ^ 0x3) & 0x3; + switch (sei_data->sei_cause) { case SM_SEI_SO_OVERFLOW: - dev_err(hdev->dev, - "SM %u SEI Error: SO %u overflow/underflow", - index, le32_to_cpu(sei_data->sei_log)); + dev_err_ratelimited(hdev->dev, + "%s SEI Error: SOB Group %u overflow/underflow", + gaudi_sync_manager_names[index], + le32_to_cpu(sei_data->sei_log)); break; case SM_SEI_LBW_4B_UNALIGNED: - dev_err(hdev->dev, - "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x", - index, le32_to_cpu(sei_data->sei_log)); + dev_err_ratelimited(hdev->dev, + "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x", + gaudi_sync_manager_names[index], + le32_to_cpu(sei_data->sei_log)); break; case SM_SEI_AXI_RESPONSE_ERR: - dev_err(hdev->dev, - "SM %u SEI Error: AXI ID %u response error", - index, le32_to_cpu(sei_data->sei_log)); + dev_err_ratelimited(hdev->dev, + "%s SEI Error: AXI ID %u response error", + gaudi_sync_manager_names[index], + le32_to_cpu(sei_data->sei_log)); break; default: - dev_err(hdev->dev, "Unknown SM SEI cause %u", + dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u", le32_to_cpu(sei_data->sei_log)); break; } @@ -7358,6 +7477,11 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, bool extract_info_from_fw; int rc; + if (hdev->asic_prop.fw_security_enabled) { + extract_info_from_fw = true; + goto extract_ecc_info; + } + switch (event_type) { case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR: case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR: @@ -7430,6 +7554,7 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, return; } +extract_ecc_info: if (extract_info_from_fw) { ecc_address = le64_to_cpu(ecc_data->ecc_address); ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); @@ -7806,8 +7931,15 @@ static void gaudi_handle_eqe(struct hl_device *hdev, u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT); - u8 cause; bool reset_required; + u8 cause; + int rc; + + if (event_type >= GAUDI_EVENT_SIZE) { + dev_err(hdev->dev, "Event type %u exceeds maximum of %u", + event_type, GAUDI_EVENT_SIZE - 1); + return; + } gaudi->events_stat[event_type]++; gaudi->events_stat_aggregate[event_type]++; @@ -7880,10 +8012,10 @@ static void gaudi_handle_eqe(struct hl_device *hdev, tpc_dec_event_to_tpc_id(event_type), "AXI_SLV_DEC_Error"); if (reset_required) { - dev_err(hdev->dev, "hard reset required due to %s\n", + dev_err(hdev->dev, "reset required due to %s\n", gaudi_irq_map_table[event_type].name); - goto reset_device; + hl_device_reset(hdev, 0); } else { hl_fw_unmask_irq(hdev, event_type); } @@ -7902,10 +8034,10 @@ static void gaudi_handle_eqe(struct hl_device *hdev, tpc_krn_event_to_tpc_id(event_type), "KRN_ERR"); if (reset_required) { - dev_err(hdev->dev, "hard reset required due to %s\n", + dev_err(hdev->dev, "reset required due to %s\n", gaudi_irq_map_table[event_type].name); - goto reset_device; + hl_device_reset(hdev, 0); } else { hl_fw_unmask_irq(hdev, event_type); } @@ -7993,6 +8125,10 @@ static void gaudi_handle_eqe(struct hl_device *hdev, gaudi_print_irq_info(hdev, event_type, false); gaudi_print_sm_sei_info(hdev, event_type, &eq_entry->sm_sei_data); + rc = hl_state_dump(hdev); + if (rc) + dev_err(hdev->dev, + "Error during system state dump %d\n", rc); hl_fw_unmask_irq(hdev, event_type); break; @@ -8031,7 +8167,9 @@ static void gaudi_handle_eqe(struct hl_device *hdev, return; reset_device: - if (hdev->hard_reset_on_fw_events) + if (hdev->asic_prop.fw_security_enabled) + hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW); + else if (hdev->hard_reset_on_fw_events) hl_device_reset(hdev, HL_RESET_HARD); else hl_fw_unmask_irq(hdev, event_type); @@ -8563,11 +8701,20 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, static int gaudi_ctx_init(struct hl_ctx *ctx) { + int rc; + if (ctx->asid == HL_KERNEL_ASID_ID) return 0; - gaudi_mmu_prepare(ctx->hdev, ctx->asid); - return gaudi_internal_cb_pool_init(ctx->hdev, ctx); + rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx); + if (rc) + return rc; + + rc = gaudi_restore_user_registers(ctx->hdev); + if (rc) + gaudi_internal_cb_pool_fini(ctx->hdev, ctx); + + return rc; } static void gaudi_ctx_fini(struct hl_ctx *ctx) @@ -8596,6 +8743,11 @@ static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) sizeof(struct packet_msg_prot) * 2; } +static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id) +{ + return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4); +} + static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb) { @@ -8902,16 +9054,12 @@ static u32 gaudi_gen_wait_cb(struct hl_device *hdev, static void gaudi_reset_sob(struct hl_device *hdev, void *data) { struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data; - int rc; dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id); - rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx, - CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + - hw_sob->sob_id * 4, 1, 0); - if (rc) - dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id); + WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + + hw_sob->sob_id * 4, 0); kref_init(&hw_sob->kref); } @@ -8977,6 +9125,280 @@ static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx) } } +static int gaudi_add_sync_to_engine_map_entry( + struct hl_sync_to_engine_map *map, u32 reg_value, + enum hl_sync_engine_type engine_type, u32 engine_id) +{ + struct hl_sync_to_engine_map_entry *entry; + + /* Reg value represents a partial address of sync object, + * it is used as unique identifier. For this we need to + * clear the cutoff cfg base bits from the value. + */ + if (reg_value == 0 || reg_value == 0xffffffff) + return 0; + reg_value -= (u32)CFG_BASE; + + /* create a new hash entry */ + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + entry->engine_type = engine_type; + entry->engine_id = engine_id; + entry->sync_id = reg_value; + hash_add(map->tb, &entry->node, reg_value); + + return 0; +} + +static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, + struct hl_sync_to_engine_map *map) +{ + struct hl_state_dump_specs *sds = &hdev->state_dump_specs; + struct gaudi_device *gaudi = hdev->asic_specific; + int i, j, rc; + u32 reg_value; + + /* Iterate over TPC engines */ + for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) { + /* TPC registered must be accessed with clock gating disabled */ + mutex_lock(&gaudi->clk_gate_mutex); + hdev->asic_funcs->disable_clock_gating(hdev); + + reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] + + sds->props[SP_NEXT_TPC] * i); + + /* We can reenable clock_gating */ + hdev->asic_funcs->set_clock_gating(hdev); + mutex_unlock(&gaudi->clk_gate_mutex); + + rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, + ENGINE_TPC, i); + if (rc) + goto free_sync_to_engine_map; + } + + /* Iterate over MME engines */ + for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) { + for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) { + /* MME registered must be accessed with clock gating + * disabled + */ + mutex_lock(&gaudi->clk_gate_mutex); + hdev->asic_funcs->disable_clock_gating(hdev); + + reg_value = RREG32(sds->props[SP_MME_CFG_SO] + + sds->props[SP_NEXT_MME] * i + + j * sizeof(u32)); + + /* We can reenable clock_gating */ + hdev->asic_funcs->set_clock_gating(hdev); + mutex_unlock(&gaudi->clk_gate_mutex); + + rc = gaudi_add_sync_to_engine_map_entry( + map, reg_value, ENGINE_MME, + i * sds->props[SP_SUB_MME_ENG_NUM] + j); + if (rc) + goto free_sync_to_engine_map; + } + } + + /* Iterate over DMA engines */ + for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) { + reg_value = RREG32(sds->props[SP_DMA_CFG_SO] + + sds->props[SP_DMA_QUEUES_OFFSET] * i); + rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, + ENGINE_DMA, i); + if (rc) + goto free_sync_to_engine_map; + } + + return 0; + +free_sync_to_engine_map: + hl_state_dump_free_sync_to_engine_map(map); + + return rc; +} + +static int gaudi_monitor_valid(struct hl_mon_state_dump *mon) +{ + return FIELD_GET( + SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK, + mon->status); +} + +static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon) +{ + const size_t max_write = 10; + u32 gid, mask, sob; + int i, offset; + + /* Sync object ID is calculated as follows: + * (8 * group_id + cleared bits in mask) + */ + gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, + mon->arm_data); + mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, + mon->arm_data); + + for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE - + max_write; mask >>= 1, i++) { + if (!(mask & 1)) { + sob = gid * MONITOR_MAX_SOBS + i; + + if (offset > 0) + offset += snprintf(sobs + offset, max_write, + ", "); + + offset += snprintf(sobs + offset, max_write, "%u", sob); + } + } +} + +static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset, + struct hl_device *hdev, + struct hl_mon_state_dump *mon) +{ + const char *name; + char scratch_buf1[BIN_REG_STRING_SIZE], + scratch_buf2[BIN_REG_STRING_SIZE]; + char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0}; + + name = hl_state_dump_get_monitor_name(hdev, mon); + if (!name) + name = ""; + + gaudi_fill_sobs_from_mon(monitored_sobs, mon); + + return hl_snprintf_resize( + buf, size, offset, + "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.", + mon->id, name, + FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, + mon->arm_data), + hl_format_as_binary( + scratch_buf1, sizeof(scratch_buf1), + FIELD_GET( + SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, + mon->arm_data)), + FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK, + mon->arm_data), + mon->wr_data, + (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low, + hl_format_as_binary( + scratch_buf2, sizeof(scratch_buf2), + FIELD_GET( + SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK, + mon->status)), + monitored_sobs); +} + + +static int gaudi_print_fences_single_engine( + struct hl_device *hdev, u64 base_offset, u64 status_base_offset, + enum hl_sync_engine_type engine_type, u32 engine_id, char **buf, + size_t *size, size_t *offset) +{ + struct hl_state_dump_specs *sds = &hdev->state_dump_specs; + int rc = -ENOMEM, i; + u32 *statuses, *fences; + + statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES], + sizeof(*statuses), GFP_KERNEL); + if (!statuses) + goto out; + + fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] * + sds->props[SP_ENGINE_NUM_OF_QUEUES], + sizeof(*fences), GFP_KERNEL); + if (!fences) + goto free_status; + + for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i) + statuses[i] = RREG32(status_base_offset + i * sizeof(u32)); + + for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] * + sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) + fences[i] = RREG32(base_offset + i * sizeof(u32)); + + /* The actual print */ + for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) { + u32 fence_id; + u64 fence_cnt, fence_rdata; + const char *engine_name; + + if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK, + statuses[i])) + continue; + + fence_id = + FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]); + fence_cnt = base_offset + CFG_BASE + + sizeof(u32) * + (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]); + fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] + + sds->props[SP_FENCE0_RDATA_OFFSET]; + engine_name = hl_sync_engine_to_string(engine_type); + + rc = hl_snprintf_resize( + buf, size, offset, + "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n", + engine_name, engine_id, + i, fence_id, + fence_cnt, engine_name, engine_id, fence_id, i, + fence_rdata, engine_name, engine_id, fence_id, i, + fences[fence_id], + statuses[i]); + if (rc) + goto free_fences; + } + + rc = 0; + +free_fences: + kfree(fences); +free_status: + kfree(statuses); +out: + return rc; +} + + +static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = { + .monitor_valid = gaudi_monitor_valid, + .print_single_monitor = gaudi_print_single_monitor, + .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map, + .print_fences_single_engine = gaudi_print_fences_single_engine, +}; + +static void gaudi_state_dump_init(struct hl_device *hdev) +{ + struct hl_state_dump_specs *sds = &hdev->state_dump_specs; + int i; + + for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i) + hash_add(sds->so_id_to_str_tb, + &gaudi_so_id_to_str[i].node, + gaudi_so_id_to_str[i].id); + + for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i) + hash_add(sds->monitor_id_to_str_tb, + &gaudi_monitor_id_to_str[i].node, + gaudi_monitor_id_to_str[i].id); + + sds->props = gaudi_state_dump_specs_props; + + sds->sync_namager_names = gaudi_sync_manager_names; + + sds->funcs = gaudi_state_dump_funcs; +} + +static u32 *gaudi_get_stream_master_qid_arr(void) +{ + return gaudi_stream_master; +} + static const struct hl_asic_funcs gaudi_funcs = { .early_init = gaudi_early_init, .early_fini = gaudi_early_fini, @@ -8989,7 +9411,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .halt_engines = gaudi_halt_engines, .suspend = gaudi_suspend, .resume = gaudi_resume, - .cb_mmap = gaudi_cb_mmap, + .mmap = gaudi_mmap, .ring_doorbell = gaudi_ring_doorbell, .pqe_write = gaudi_pqe_write, .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent, @@ -9062,7 +9484,11 @@ static const struct hl_asic_funcs gaudi_funcs = { .enable_events_from_fw = gaudi_enable_events_from_fw, .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx, .init_firmware_loader = gaudi_init_firmware_loader, - .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm + .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm, + .state_dump_init = gaudi_state_dump_init, + .get_sob_addr = gaudi_get_sob_addr, + .set_pci_memory_regions = gaudi_set_pci_memory_regions, + .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr }; /** diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index 957bf3720f70..bbbf1c343e75 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -36,6 +36,8 @@ #define NUMBER_OF_INTERRUPTS (NUMBER_OF_CMPLT_QUEUES + \ NUMBER_OF_CPU_HW_QUEUES) +#define GAUDI_STREAM_MASTER_ARR_SIZE 8 + #if (NUMBER_OF_INTERRUPTS > GAUDI_MSI_ENTRIES) #error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES" #endif @@ -50,6 +52,8 @@ #define DC_POWER_DEFAULT_PCI 60000 /* 60W */ #define DC_POWER_DEFAULT_PMC 60000 /* 60W */ +#define DC_POWER_DEFAULT_PMC_SEC 97000 /* 97W */ + #define GAUDI_CPU_TIMEOUT_USEC 30000000 /* 30s */ #define TPC_ENABLED_MASK 0xFF @@ -62,7 +66,7 @@ #define DMA_MAX_TRANSFER_SIZE U32_MAX -#define GAUDI_DEFAULT_CARD_NAME "HL2000" +#define GAUDI_DEFAULT_CARD_NAME "HL205" #define GAUDI_MAX_PENDING_CS SZ_16K @@ -117,6 +121,7 @@ (((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511 - \ mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0) + 4) >> 2) +#define MONITOR_MAX_SOBS 8 /* DRAM Memory Map */ @@ -200,6 +205,18 @@ #define HW_CAP_TPC_MASK GENMASK(31, 24) #define HW_CAP_TPC_SHIFT 24 +#define NEXT_SYNC_OBJ_ADDR_INTERVAL \ + (mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 - \ + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0) +#define NUM_OF_MME_ENGINES 2 +#define NUM_OF_MME_SUB_ENGINES 2 +#define NUM_OF_TPC_ENGINES 8 +#define NUM_OF_DMA_ENGINES 8 +#define NUM_OF_QUEUES 5 +#define NUM_OF_STREAMS 4 +#define NUM_OF_FENCES 4 + + #define GAUDI_CPU_PCI_MSB_ADDR(addr) (((addr) & GENMASK_ULL(49, 39)) >> 39) #define GAUDI_PCI_TO_CPU_ADDR(addr) \ do { \ diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c index c2a27ed1c4d1..5349c1be13f9 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c @@ -622,11 +622,6 @@ static int gaudi_config_etr(struct hl_device *hdev, return -EINVAL; } - gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, - hdev->compute_ctx->asid); - gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, - hdev->compute_ctx->asid); - msb = upper_32_bits(input->buffer_address) >> 8; msb &= PSOC_GLOBAL_CONF_TRACE_ADDR_MSB_MASK; WREG32(mmPSOC_GLOBAL_CONF_TRACE_ADDR, msb); diff --git a/drivers/misc/habanalabs/gaudi/gaudi_security.c b/drivers/misc/habanalabs/gaudi/gaudi_security.c index 0d3240f1f7d7..cb265c00cf73 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_security.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_security.c @@ -9559,6 +9559,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask |= 1U << ((mmTPC0_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_TPC_STALL & 0x7F) >> 2); + mask |= 1U << ((mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_RD_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_WR_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_MSS_CONFIG & 0x7F) >> 2); @@ -10013,6 +10014,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask |= 1U << ((mmTPC1_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_TPC_STALL & 0x7F) >> 2); + mask |= 1U << ((mmTPC1_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_RD_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_WR_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_MSS_CONFIG & 0x7F) >> 2); @@ -10466,6 +10468,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask |= 1U << ((mmTPC2_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_TPC_STALL & 0x7F) >> 2); + mask |= 1U << ((mmTPC2_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_RD_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_WR_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_MSS_CONFIG & 0x7F) >> 2); @@ -10919,6 +10922,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask |= 1U << ((mmTPC3_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_TPC_STALL & 0x7F) >> 2); + mask |= 1U << ((mmTPC3_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_RD_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_WR_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_MSS_CONFIG & 0x7F) >> 2); @@ -11372,6 +11376,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask |= 1U << ((mmTPC4_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_TPC_STALL & 0x7F) >> 2); + mask |= 1U << ((mmTPC4_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_RD_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_WR_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_MSS_CONFIG & 0x7F) >> 2); @@ -11825,6 +11830,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask |= 1U << ((mmTPC5_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_TPC_STALL & 0x7F) >> 2); + mask |= 1U << ((mmTPC5_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_RD_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_WR_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_MSS_CONFIG & 0x7F) >> 2); @@ -12280,6 +12286,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask |= 1U << ((mmTPC6_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_TPC_STALL & 0x7F) >> 2); + mask |= 1U << ((mmTPC6_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_RD_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_WR_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_MSS_CONFIG & 0x7F) >> 2); @@ -12735,6 +12742,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask |= 1U << ((mmTPC7_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_TPC_STALL & 0x7F) >> 2); + mask |= 1U << ((mmTPC7_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_RD_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_WR_RATE_LIMIT & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_MSS_CONFIG & 0x7F) >> 2); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 755e08cf2ecc..031c1849da14 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -350,6 +350,8 @@ static u32 goya_all_events[] = { GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E }; +static s64 goya_state_dump_specs_props[SP_MAX] = {0}; + static int goya_mmu_clear_pgt_range(struct hl_device *hdev); static int goya_mmu_set_dram_default_page(struct hl_device *hdev); static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev); @@ -387,6 +389,7 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER; } + prop->device_dma_offset_for_host_access = HOST_PHYS_BASE; prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; prop->dram_base_address = DRAM_PHYS_BASE; @@ -466,6 +469,8 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->hard_reset_done_by_fw = false; prop->gic_interrupts_enable = true; + prop->server_type = HL_SERVER_TYPE_UNKNOWN; + return 0; } @@ -649,14 +654,14 @@ pci_init: GOYA_BOOT_FIT_REQ_TIMEOUT_USEC); if (rc) { if (hdev->reset_on_preboot_fail) - hdev->asic_funcs->hw_fini(hdev, true); + hdev->asic_funcs->hw_fini(hdev, true, false); goto pci_fini; } if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { dev_info(hdev->dev, "H/W state is dirty, must reset before initializing\n"); - hdev->asic_funcs->hw_fini(hdev, true); + hdev->asic_funcs->hw_fini(hdev, true, false); } if (!hdev->pldm) { @@ -955,8 +960,9 @@ static int goya_sw_init(struct hl_device *hdev) hdev->supports_coresight = true; hdev->supports_soft_reset = true; hdev->allow_external_soft_reset = true; + hdev->supports_wait_for_multi_cs = false; - goya_set_pci_memory_regions(hdev); + hdev->asic_funcs->set_pci_memory_regions(hdev); return 0; @@ -2374,7 +2380,7 @@ static void goya_disable_timestamp(struct hl_device *hdev) WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); } -static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) +static void goya_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) { u32 wait_timeout_ms; @@ -2493,6 +2499,7 @@ static void goya_init_firmware_loader(struct hl_device *hdev) struct fw_load_mgr *fw_loader = &hdev->fw_loader; /* fill common fields */ + fw_loader->linux_loaded = false; fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE; fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE; fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC; @@ -2696,14 +2703,7 @@ disable_queues: return rc; } -/* - * goya_hw_fini - Goya hardware tear-down code - * - * @hdev: pointer to hl_device structure - * @hard_reset: should we do hard reset to all engines or just reset the - * compute/dma engines - */ -static void goya_hw_fini(struct hl_device *hdev, bool hard_reset) +static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) { struct goya_device *goya = hdev->asic_specific; u32 reset_timeout_ms, cpu_timeout_ms, status; @@ -2796,7 +2796,7 @@ int goya_resume(struct hl_device *hdev) return goya_init_iatu(hdev); } -static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, +static int goya_mmap(struct hl_device *hdev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size) { int rc; @@ -4797,6 +4797,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) >> EQ_CTL_EVENT_TYPE_SHIFT); struct goya_device *goya = hdev->asic_specific; + if (event_type >= GOYA_ASYNC_EVENT_ID_SIZE) { + dev_err(hdev->dev, "Event type %u exceeds maximum of %u", + event_type, GOYA_ASYNC_EVENT_ID_SIZE - 1); + return; + } + goya->events_stat[event_type]++; goya->events_stat_aggregate[event_type]++; @@ -5475,14 +5481,14 @@ u64 goya_get_device_time(struct hl_device *hdev) return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); } -static void goya_collective_wait_init_cs(struct hl_cs *cs) +static int goya_collective_wait_init_cs(struct hl_cs *cs) { - + return 0; } static int goya_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id, - u32 collective_engine_id) + u32 collective_engine_id, u32 encaps_signal_offset) { return -EINVAL; } @@ -5524,6 +5530,62 @@ static int goya_map_pll_idx_to_fw_idx(u32 pll_idx) } } +static int goya_gen_sync_to_engine_map(struct hl_device *hdev, + struct hl_sync_to_engine_map *map) +{ + /* Not implemented */ + return 0; +} + +static int goya_monitor_valid(struct hl_mon_state_dump *mon) +{ + /* Not implemented */ + return 0; +} + +static int goya_print_single_monitor(char **buf, size_t *size, size_t *offset, + struct hl_device *hdev, + struct hl_mon_state_dump *mon) +{ + /* Not implemented */ + return 0; +} + + +static int goya_print_fences_single_engine( + struct hl_device *hdev, u64 base_offset, u64 status_base_offset, + enum hl_sync_engine_type engine_type, u32 engine_id, char **buf, + size_t *size, size_t *offset) +{ + /* Not implemented */ + return 0; +} + + +static struct hl_state_dump_specs_funcs goya_state_dump_funcs = { + .monitor_valid = goya_monitor_valid, + .print_single_monitor = goya_print_single_monitor, + .gen_sync_to_engine_map = goya_gen_sync_to_engine_map, + .print_fences_single_engine = goya_print_fences_single_engine, +}; + +static void goya_state_dump_init(struct hl_device *hdev) +{ + /* Not implemented */ + hdev->state_dump_specs.props = goya_state_dump_specs_props; + hdev->state_dump_specs.funcs = goya_state_dump_funcs; +} + +static u32 goya_get_sob_addr(struct hl_device *hdev, u32 sob_id) +{ + return 0; +} + +static u32 *goya_get_stream_master_qid_arr(void) +{ + return NULL; +} + static const struct hl_asic_funcs goya_funcs = { .early_init = goya_early_init, .early_fini = goya_early_fini, @@ -5536,7 +5598,7 @@ static const struct hl_asic_funcs goya_funcs = { .halt_engines = goya_halt_engines, .suspend = goya_suspend, .resume = goya_resume, - .cb_mmap = goya_cb_mmap, + .mmap = goya_mmap, .ring_doorbell = goya_ring_doorbell, .pqe_write = goya_pqe_write, .asic_dma_alloc_coherent = goya_dma_alloc_coherent, @@ -5609,7 +5671,11 @@ static const struct hl_asic_funcs goya_funcs = { .enable_events_from_fw = goya_enable_events_from_fw, .map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx, .init_firmware_loader = goya_init_firmware_loader, - .init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram + .init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram, + .state_dump_init = goya_state_dump_init, + .get_sob_addr = &goya_get_sob_addr, + .set_pci_memory_regions = goya_set_pci_memory_regions, + .get_stream_master_qid_arr = goya_get_stream_master_qid_arr, }; /* diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index 80b1d5a9d9f1..9ff6a448f0d4 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -98,6 +98,18 @@ struct hl_eq_fw_alive { __u8 pad[7]; }; +enum hl_pcie_addr_dec_cause { + PCIE_ADDR_DEC_HBW_ERR_RESP, + PCIE_ADDR_DEC_LBW_ERR_RESP, + PCIE_ADDR_DEC_TLP_BLOCKED_BY_RR +}; + +struct hl_eq_pcie_addr_dec_data { + /* enum hl_pcie_addr_dec_cause */ + __u8 addr_dec_cause; + __u8 pad[7]; +}; + struct hl_eq_entry { struct hl_eq_header hdr; union { @@ -106,6 +118,7 @@ struct hl_eq_entry { struct hl_eq_sm_sei_data sm_sei_data; struct cpucp_pkt_sync_err pkt_sync_err; struct hl_eq_fw_alive fw_alive; + struct hl_eq_pcie_addr_dec_data pcie_addr_dec_data; __le64 data[7]; }; }; @@ -116,7 +129,7 @@ struct hl_eq_entry { #define EQ_CTL_READY_MASK 0x80000000 #define EQ_CTL_EVENT_TYPE_SHIFT 16 -#define EQ_CTL_EVENT_TYPE_MASK 0x03FF0000 +#define EQ_CTL_EVENT_TYPE_MASK 0x0FFF0000 #define EQ_CTL_INDEX_SHIFT 0 #define EQ_CTL_INDEX_MASK 0x0000FFFF @@ -300,7 +313,7 @@ enum pq_init_status { * The packet's arguments specify the desired sensor and the field to * set. * - * CPUCP_PACKET_PCIE_THROUGHPUT_GET + * CPUCP_PACKET_PCIE_THROUGHPUT_GET - * Get throughput of PCIe. * The packet's arguments specify the transaction direction (TX/RX). * The window measurement is 10[msec], and the return value is in KB/sec. @@ -309,19 +322,19 @@ enum pq_init_status { * Replay count measures number of "replay" events, which is basicly * number of retries done by PCIe. * - * CPUCP_PACKET_TOTAL_ENERGY_GET + * CPUCP_PACKET_TOTAL_ENERGY_GET - * Total Energy is measurement of energy from the time FW Linux * is loaded. It is calculated by multiplying the average power * by time (passed from armcp start). The units are in MilliJouls. * - * CPUCP_PACKET_PLL_INFO_GET + * CPUCP_PACKET_PLL_INFO_GET - * Fetch frequencies of PLL from the required PLL IP. * The packet's arguments specify the device PLL type * Pll type is the PLL from device pll_index enum. * The result is composed of 4 outputs, each is 16-bit * frequency in MHz. * - * CPUCP_PACKET_POWER_GET + * CPUCP_PACKET_POWER_GET - * Fetch the present power consumption of the device (Current * Voltage). * * CPUCP_PACKET_NIC_PFC_SET - @@ -345,6 +358,24 @@ enum pq_init_status { * CPUCP_PACKET_MSI_INFO_SET - * set the index number for each supported msi type going from * host to device + * + * CPUCP_PACKET_NIC_XPCS91_REGS_GET - + * Fetch the un/correctable counters values from the NIC MAC. + * + * CPUCP_PACKET_NIC_STAT_REGS_GET - + * Fetch various NIC MAC counters from the NIC STAT. + * + * CPUCP_PACKET_NIC_STAT_REGS_CLR - + * Clear the various NIC MAC counters in the NIC STAT. + * + * CPUCP_PACKET_NIC_STAT_REGS_ALL_GET - + * Fetch all NIC MAC counters from the NIC STAT. + * + * CPUCP_PACKET_IS_IDLE_CHECK - + * Check if the device is IDLE in regard to the DMA/compute engines + * and QMANs. The f/w will return a bitmask where each bit represents + * a different engine or QMAN according to enum cpucp_idle_mask. + * The bit will be 1 if the engine is NOT idle. */ enum cpucp_packet_id { @@ -385,6 +416,11 @@ enum cpucp_packet_id { CPUCP_PACKET_NIC_LPBK_SET, /* internal */ CPUCP_PACKET_NIC_MAC_CFG, /* internal */ CPUCP_PACKET_MSI_INFO_SET, /* internal */ + CPUCP_PACKET_NIC_XPCS91_REGS_GET, /* internal */ + CPUCP_PACKET_NIC_STAT_REGS_GET, /* internal */ + CPUCP_PACKET_NIC_STAT_REGS_CLR, /* internal */ + CPUCP_PACKET_NIC_STAT_REGS_ALL_GET, /* internal */ + CPUCP_PACKET_IS_IDLE_CHECK, /* internal */ }; #define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 @@ -414,6 +450,11 @@ enum cpucp_packet_id { #define CPUCP_PKT_VAL_LPBK_IN2_SHIFT 1 #define CPUCP_PKT_VAL_LPBK_IN2_MASK 0x000000000000001Eull +#define CPUCP_PKT_VAL_MAC_CNT_IN1_SHIFT 0 +#define CPUCP_PKT_VAL_MAC_CNT_IN1_MASK 0x0000000000000001ull +#define CPUCP_PKT_VAL_MAC_CNT_IN2_SHIFT 1 +#define CPUCP_PKT_VAL_MAC_CNT_IN2_MASK 0x00000000FFFFFFFEull + /* heartbeat status bits */ #define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT 0 #define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK 0x00000001 @@ -467,7 +508,8 @@ struct cpucp_packet { __le32 status_mask; }; - __le32 reserved; + /* For NIC requests */ + __le32 port_index; }; struct cpucp_unmask_irq_arr_packet { @@ -476,6 +518,12 @@ struct cpucp_unmask_irq_arr_packet { __le32 irqs[0]; }; +struct cpucp_nic_status_packet { + struct cpucp_packet cpucp_pkt; + __le32 length; + __le32 data[0]; +}; + struct cpucp_array_data_packet { struct cpucp_packet cpucp_pkt; __le32 length; @@ -595,6 +643,18 @@ enum pll_index { PLL_MAX }; +enum rl_index { + TPC_RL = 0, + MME_RL, +}; + +enum pvt_index { + PVT_SW, + PVT_SE, + PVT_NW, + PVT_NE +}; + /* Event Queue Packets */ struct eq_generic_event { @@ -700,6 +760,15 @@ struct cpucp_mac_addr { __u8 mac_addr[ETH_ALEN]; }; +enum cpucp_serdes_type { + TYPE_1_SERDES_TYPE, + TYPE_2_SERDES_TYPE, + HLS1_SERDES_TYPE, + HLS1H_SERDES_TYPE, + UNKNOWN_SERDES_TYPE, + MAX_NUM_SERDES_TYPE = UNKNOWN_SERDES_TYPE +}; + struct cpucp_nic_info { struct cpucp_mac_addr mac_addrs[CPUCP_MAX_NICS]; __le64 link_mask[CPUCP_NIC_MASK_ARR_LEN]; @@ -708,6 +777,40 @@ struct cpucp_nic_info { __le64 link_ext_mask[CPUCP_NIC_MASK_ARR_LEN]; __u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN]; __le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN]; + __le16 serdes_type; /* enum cpucp_serdes_type */ + __u8 reserved[6]; +}; + +/* + * struct cpucp_nic_status - describes the status of a NIC port. + * @port: NIC port index. + * @bad_format_cnt: e.g. CRC. + * @responder_out_of_sequence_psn_cnt: e.g NAK. + * @high_ber_reinit_cnt: link reinit due to high BER. + * @correctable_err_cnt: e.g. bit-flip. + * @uncorrectable_err_cnt: e.g. MAC errors. + * @retraining_cnt: re-training counter. + * @up: is port up. + * @pcs_link: has PCS link. + * @phy_ready: is PHY ready. + * @auto_neg: is Autoneg enabled. + * @timeout_retransmission_cnt: timeout retransmission events + * @high_ber_cnt: high ber events + */ +struct cpucp_nic_status { + __le32 port; + __le32 bad_format_cnt; + __le32 responder_out_of_sequence_psn_cnt; + __le32 high_ber_reinit; + __le32 correctable_err_cnt; + __le32 uncorrectable_err_cnt; + __le32 retraining_cnt; + __u8 up; + __u8 pcs_link; + __u8 phy_ready; + __u8 auto_neg; + __le32 timeout_retransmission_cnt; + __le32 high_ber_cnt; }; #endif /* CPUCP_IF_H */ diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index fa8a5ad2d438..3099653234e4 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -78,6 +78,26 @@ * CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL Device is unusable and customer support * should be contacted. * + * CPU_BOOT_ERR0_ARC0_HALT_ACK_NOT_RCVD HALT ACK from ARC0 is not received + * within specified retries after issuing + * HALT request. ARC0 appears to be in bad + * reset. + * + * CPU_BOOT_ERR0_ARC1_HALT_ACK_NOT_RCVD HALT ACK from ARC1 is not received + * within specified retries after issuing + * HALT request. ARC1 appears to be in bad + * reset. + * + * CPU_BOOT_ERR0_ARC0_RUN_ACK_NOT_RCVD RUN ACK from ARC0 is not received + * within specified timeout after issuing + * RUN request. ARC0 appears to be in bad + * reset. + * + * CPU_BOOT_ERR0_ARC1_RUN_ACK_NOT_RCVD RUN ACK from ARC1 is not received + * within specified timeout after issuing + * RUN request. ARC1 appears to be in bad + * reset. + * * CPU_BOOT_ERR0_ENABLED Error registers enabled. * This is a main indication that the * running FW populates the error @@ -98,6 +118,10 @@ #define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << 11) #define CPU_BOOT_ERR0_PLL_FAIL (1 << 12) #define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << 13) +#define CPU_BOOT_ERR0_ARC0_HALT_ACK_NOT_RCVD (1 << 14) +#define CPU_BOOT_ERR0_ARC1_HALT_ACK_NOT_RCVD (1 << 15) +#define CPU_BOOT_ERR0_ARC0_RUN_ACK_NOT_RCVD (1 << 16) +#define CPU_BOOT_ERR0_ARC1_RUN_ACK_NOT_RCVD (1 << 17) #define CPU_BOOT_ERR0_ENABLED (1 << 31) #define CPU_BOOT_ERR1_ENABLED (1 << 31) @@ -186,6 +210,10 @@ * configured and is ready for use. * Initialized in: ppboot * + * CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN NIC MAC channels init is done by FW and + * any access to them is done via the FW. + * Initialized in: linux + * * CPU_BOOT_DEV_STS0_DYN_PLL_EN Dynamic PLL configuration is enabled. * FW sends to host a bitmap of supported * PLLs. @@ -209,6 +237,21 @@ * prevent IRQs overriding each other. * Initialized in: linux * + * CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN + * NIC STAT and XPCS91 access is restricted + * and is done via FW only. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN + * NIC STAT get all is supported. + * Initialized in: linux + * + * CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN + * F/W checks if the device is idle by reading defined set + * of registers. It returns a bitmask of all the engines, + * where a bit is set if the engine is not idle. + * Initialized in: linux + * * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. * This is a main indication that the * running FW populates the device status @@ -236,10 +279,14 @@ #define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << 15) #define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << 16) #define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << 17) +#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN (1 << 18) #define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << 19) #define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN (1 << 20) #define CPU_BOOT_DEV_STS0_EQ_INDEX_EN (1 << 21) #define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN (1 << 22) +#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN (1 << 23) +#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN (1 << 24) +#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN (1 << 25) #define CPU_BOOT_DEV_STS0_ENABLED (1 << 31) #define CPU_BOOT_DEV_STS1_ENABLED (1 << 31) @@ -313,10 +360,7 @@ struct cpu_dyn_regs { __le32 hw_state; __le32 kmd_msg_to_cpu; __le32 cpu_cmd_status_to_host; - union { - __le32 gic_host_irq_ctrl; - __le32 gic_host_pi_upd_irq; - }; + __le32 gic_host_pi_upd_irq; __le32 gic_tpc_qm_irq_ctrl; __le32 gic_mme_qm_irq_ctrl; __le32 gic_dma_qm_irq_ctrl; @@ -324,7 +368,9 @@ struct cpu_dyn_regs { __le32 gic_dma_core_irq_ctrl; __le32 gic_host_halt_irq; __le32 gic_host_ints_irq; - __le32 reserved1[24]; /* reserve for future use */ + __le32 gic_host_soft_rst_irq; + __le32 gic_rot_qm_irq_ctrl; + __le32 reserved1[22]; /* reserve for future use */ }; /* TODO: remove the desc magic after the code is updated to use message */ @@ -462,6 +508,11 @@ struct lkd_fw_comms_msg { * Do not wait for BMC response. * * COMMS_LOW_PLL_OPP Initialize PLLs for low OPP. + * + * COMMS_PREP_DESC_ELBI Same as COMMS_PREP_DESC only that the memory + * space is allocated in a ELBI access only + * address range. + * */ enum comms_cmd { COMMS_NOOP = 0, @@ -474,6 +525,7 @@ enum comms_cmd { COMMS_GOTO_WFE = 7, COMMS_SKIP_BMC = 8, COMMS_LOW_PLL_OPP = 9, + COMMS_PREP_DESC_ELBI = 10, COMMS_INVLD_LAST }; diff --git a/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h b/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h index 5bb54b34a8ae..ffdfbd9b3220 100644 --- a/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h +++ b/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h @@ -126,6 +126,9 @@ #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 0x4F2004 #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 0x4F3FFC #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 0x4F4000 +#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 0x4F4800 +#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0 0x4F5000 +#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0 0x4F5800 #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 0x4F6000 #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511 0x4F67FC diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h b/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h index 9aea7e996654..acc85d3ed98b 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h @@ -449,4 +449,21 @@ enum axi_id { #define PCIE_AUX_FLR_CTRL_HW_CTRL_MASK 0x1 #define PCIE_AUX_FLR_CTRL_INT_MASK_MASK 0x2 +#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_SHIFT 0 +#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK 0x1 +#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_SHIFT 1 +#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK 0x1FE +#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_SHIFT 0 +#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK 0xFF +#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_SHIFT 8 +#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK 0xFF00 +#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOP_SHIFT 16 +#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOP_MASK 0x10000 +#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_SHIFT 17 +#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK 0xFFFE0000 +#define TPC0_QM_CP_STS_0_FENCE_ID_SHIFT 20 +#define TPC0_QM_CP_STS_0_FENCE_ID_MASK 0x300000 +#define TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_SHIFT 22 +#define TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK 0x400000 + #endif /* GAUDI_MASKS_H_ */ diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h b/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h index d95d4162ae2c..b9bd5a7f71eb 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h @@ -12,8 +12,6 @@ * PSOC scratch-pad registers */ #define mmHW_STATE mmPSOC_GLOBAL_CONF_SCRATCHPAD_0 -/* TODO: remove mmGIC_HOST_IRQ_CTRL_POLL_REG */ -#define mmGIC_HOST_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_1 #define mmGIC_HOST_PI_UPD_IRQ_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_1 #define mmGIC_TPC_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_2 #define mmGIC_MME_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_3 diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index 95b1c6800a22..fe6fd34b8caf 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -26,6 +26,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/debugfs.h> +#include <linux/utsname.h> #define DEFAULT_COUNT 10 @@ -210,6 +211,8 @@ module_param(cpoint_count, int, 0644); MODULE_PARM_DESC(cpoint_count, " Crash Point Count, number of times the "\ "crash point is to be hit to trigger action"); +/* For test debug reporting. */ +char *lkdtm_kernel_info; /* Return the crashtype number or NULL if the name is invalid */ static const struct crashtype *find_crashtype(const char *name) @@ -490,6 +493,11 @@ static int __init lkdtm_module_init(void) crash_count = cpoint_count; #endif + /* Common initialization. */ + lkdtm_kernel_info = kasprintf(GFP_KERNEL, "kernel (%s %s)", + init_uts_ns.name.release, + init_uts_ns.name.machine); + /* Handle test-specific initialization. */ lkdtm_bugs_init(&recur_count); lkdtm_perms_init(); @@ -538,6 +546,8 @@ static void __exit lkdtm_module_exit(void) if (lkdtm_kprobe != NULL) unregister_kprobe(lkdtm_kprobe); + kfree(lkdtm_kernel_info); + pr_info("Crash point unregistered\n"); } diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h index d7d64d9765eb..c212a253edde 100644 --- a/drivers/misc/lkdtm/lkdtm.h +++ b/drivers/misc/lkdtm/lkdtm.h @@ -5,17 +5,17 @@ #define pr_fmt(fmt) "lkdtm: " fmt #include <linux/kernel.h> -#include <generated/compile.h> -#include <generated/utsrelease.h> -#define LKDTM_KERNEL "kernel (" UTS_RELEASE " " UTS_MACHINE ")" +extern char *lkdtm_kernel_info; #define pr_expected_config(kconfig) \ { \ if (IS_ENABLED(kconfig)) \ - pr_err("Unexpected! This " LKDTM_KERNEL " was built with " #kconfig "=y\n"); \ + pr_err("Unexpected! This %s was built with " #kconfig "=y\n", \ + lkdtm_kernel_info); \ else \ - pr_warn("This is probably expected, since this " LKDTM_KERNEL " was built *without* " #kconfig "=y\n"); \ + pr_warn("This is probably expected, since this %s was built *without* " #kconfig "=y\n", \ + lkdtm_kernel_info); \ } #ifndef MODULE @@ -25,24 +25,30 @@ int lkdtm_check_bool_cmdline(const char *param); if (IS_ENABLED(kconfig)) { \ switch (lkdtm_check_bool_cmdline(param)) { \ case 0: \ - pr_warn("This is probably expected, since this " LKDTM_KERNEL " was built with " #kconfig "=y but booted with '" param "=N'\n"); \ + pr_warn("This is probably expected, since this %s was built with " #kconfig "=y but booted with '" param "=N'\n", \ + lkdtm_kernel_info); \ break; \ case 1: \ - pr_err("Unexpected! This " LKDTM_KERNEL " was built with " #kconfig "=y and booted with '" param "=Y'\n"); \ + pr_err("Unexpected! This %s was built with " #kconfig "=y and booted with '" param "=Y'\n", \ + lkdtm_kernel_info); \ break; \ default: \ - pr_err("Unexpected! This " LKDTM_KERNEL " was built with " #kconfig "=y (and booted without '" param "' specified)\n"); \ + pr_err("Unexpected! This %s was built with " #kconfig "=y (and booted without '" param "' specified)\n", \ + lkdtm_kernel_info); \ } \ } else { \ switch (lkdtm_check_bool_cmdline(param)) { \ case 0: \ - pr_warn("This is probably expected, as this " LKDTM_KERNEL " was built *without* " #kconfig "=y and booted with '" param "=N'\n"); \ + pr_warn("This is probably expected, as this %s was built *without* " #kconfig "=y and booted with '" param "=N'\n", \ + lkdtm_kernel_info); \ break; \ case 1: \ - pr_err("Unexpected! This " LKDTM_KERNEL " was built *without* " #kconfig "=y but booted with '" param "=Y'\n"); \ + pr_err("Unexpected! This %s was built *without* " #kconfig "=y but booted with '" param "=Y'\n", \ + lkdtm_kernel_info); \ break; \ default: \ - pr_err("This is probably expected, since this " LKDTM_KERNEL " was built *without* " #kconfig "=y (and booted without '" param "' specified)\n"); \ + pr_err("This is probably expected, since this %s was built *without* " #kconfig "=y (and booted without '" param "' specified)\n", \ + lkdtm_kernel_info); \ break; \ } \ } \ diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 64d6dfa83122..267324889dd6 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1885,6 +1885,12 @@ static int gswip_gphy_fw_load(struct gswip_priv *priv, struct gswip_gphy_fw *gph reset_control_assert(gphy_fw->reset); + /* The vendor BSP uses a 200ms delay after asserting the reset line. + * Without this some users are observing that the PHY is not coming up + * on the MDIO bus. + */ + msleep(200); + ret = request_firmware(&fw, gphy_fw->fw_name, dev); if (ret) { dev_err(dev, "failed to load firmware: %s, error: %i\n", diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 1f63f50f73f1..bda5a9bf4f52 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -643,10 +643,8 @@ qca8k_mdio_busy_wait(struct mii_bus *bus, u32 reg, u32 mask) } static int -qca8k_mdio_write(struct mii_bus *salve_bus, int phy, int regnum, u16 data) +qca8k_mdio_write(struct mii_bus *bus, int phy, int regnum, u16 data) { - struct qca8k_priv *priv = salve_bus->priv; - struct mii_bus *bus = priv->bus; u16 r1, r2, page; u32 val; int ret; @@ -682,10 +680,8 @@ exit: } static int -qca8k_mdio_read(struct mii_bus *salve_bus, int phy, int regnum) +qca8k_mdio_read(struct mii_bus *bus, int phy, int regnum) { - struct qca8k_priv *priv = salve_bus->priv; - struct mii_bus *bus = priv->bus; u16 r1, r2, page; u32 val; int ret; @@ -727,6 +723,24 @@ exit: } static int +qca8k_internal_mdio_write(struct mii_bus *slave_bus, int phy, int regnum, u16 data) +{ + struct qca8k_priv *priv = slave_bus->priv; + struct mii_bus *bus = priv->bus; + + return qca8k_mdio_write(bus, phy, regnum, data); +} + +static int +qca8k_internal_mdio_read(struct mii_bus *slave_bus, int phy, int regnum) +{ + struct qca8k_priv *priv = slave_bus->priv; + struct mii_bus *bus = priv->bus; + + return qca8k_mdio_read(bus, phy, regnum); +} + +static int qca8k_phy_write(struct dsa_switch *ds, int port, int regnum, u16 data) { struct qca8k_priv *priv = ds->priv; @@ -775,8 +789,8 @@ qca8k_mdio_register(struct qca8k_priv *priv, struct device_node *mdio) bus->priv = (void *)priv; bus->name = "qca8k slave mii"; - bus->read = qca8k_mdio_read; - bus->write = qca8k_mdio_write; + bus->read = qca8k_internal_mdio_read; + bus->write = qca8k_internal_mdio_write; snprintf(bus->id, MII_BUS_ID_SIZE, "qca8k-%d", ds->index); diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index 8d90fed5d33e..6f0ea2facea9 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -1050,7 +1050,7 @@ static netdev_tx_t corkscrew_start_xmit(struct sk_buff *skb, #ifdef VORTEX_BUS_MASTER if (vp->bus_master) { /* Set the bus-master controller to transfer the packet. */ - outl((int) (skb->data), ioaddr + Wn7_MasterAddr); + outl(isa_virt_to_bus(skb->data), ioaddr + Wn7_MasterAddr); outw((skb->len + 3) & ~3, ioaddr + Wn7_MasterLen); vp->tx_skb = skb; outw(StartDMADown, ioaddr + EL3_CMD); diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c index 53660bc8d6ff..9afc712f5948 100644 --- a/drivers/net/ethernet/8390/ne.c +++ b/drivers/net/ethernet/8390/ne.c @@ -922,13 +922,16 @@ static void __init ne_add_devices(void) } } -#ifdef MODULE static int __init ne_init(void) { int retval; - ne_add_devices(); + + if (IS_MODULE(CONFIG_NE2000)) + ne_add_devices(); + retval = platform_driver_probe(&ne_driver, ne_drv_probe); - if (retval) { + + if (IS_MODULE(CONFIG_NE2000) && retval) { if (io[0] == 0) pr_notice("ne.c: You must supply \"io=0xNNN\"" " value(s) for ISA cards.\n"); @@ -941,18 +944,8 @@ static int __init ne_init(void) return retval; } module_init(ne_init); -#else /* MODULE */ -static int __init ne_init(void) -{ - int retval = platform_driver_probe(&ne_driver, ne_drv_probe); - - /* Unregister unused platform_devices. */ - ne_loop_rm_unreg(0); - return retval; -} -module_init(ne_init); -#ifdef CONFIG_NETDEV_LEGACY_INIT +#if !defined(MODULE) && defined(CONFIG_NETDEV_LEGACY_INIT) struct net_device * __init ne_probe(int unit) { int this_dev; @@ -994,7 +987,6 @@ struct net_device * __init ne_probe(int unit) return ERR_PTR(-ENODEV); } #endif -#endif /* MODULE */ static void __exit ne_exit(void) { diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c index b5df7ad5a83f..032e8922b482 100644 --- a/drivers/net/ethernet/amd/ni65.c +++ b/drivers/net/ethernet/amd/ni65.c @@ -748,7 +748,7 @@ static void ni65_stop_start(struct net_device *dev,struct priv *p) #ifdef XMT_VIA_SKB skb_save[i] = p->tmd_skb[i]; #endif - buffer[i] = (u32) isa_bus_to_virt(tmdp->u.buffer); + buffer[i] = (unsigned long)isa_bus_to_virt(tmdp->u.buffer); blen[i] = tmdp->blen; tmdp->u.s.status = 0x0; } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index f255fd0b16db..6fbf735fca31 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -1224,7 +1224,7 @@ int bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param, /* SR-IOV capability was enabled but there are no VFs*/ if (iov->total == 0) { - err = -EINVAL; + err = 0; goto failed; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ea0c45d33814..037767b370d5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2213,12 +2213,11 @@ static int bnxt_async_event_process(struct bnxt *bp, DIV_ROUND_UP(fw_health->polling_dsecs * HZ, bp->current_interval * 10); fw_health->tmr_counter = fw_health->tmr_multiplier; - if (!fw_health->enabled) { + if (!fw_health->enabled) fw_health->last_fw_heartbeat = bnxt_fw_health_readl(bp, BNXT_FW_HEARTBEAT_REG); - fw_health->last_fw_reset_cnt = - bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG); - } + fw_health->last_fw_reset_cnt = + bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG); netif_info(bp, drv, bp->dev, "Error recovery info: error recovery[1], master[%d], reset count[%u], health status: 0x%x\n", fw_health->master, fw_health->last_fw_reset_cnt, @@ -2730,6 +2729,9 @@ static void bnxt_free_tx_skbs(struct bnxt *bp) struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; int j; + if (!txr->tx_buf_ring) + continue; + for (j = 0; j < max_idx;) { struct bnxt_sw_tx_bd *tx_buf = &txr->tx_buf_ring[j]; struct sk_buff *skb; @@ -2814,6 +2816,9 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr) } skip_rx_tpa_free: + if (!rxr->rx_buf_ring) + goto skip_rx_buf_free; + for (i = 0; i < max_idx; i++) { struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[i]; dma_addr_t mapping = rx_buf->mapping; @@ -2836,6 +2841,11 @@ skip_rx_tpa_free: kfree(data); } } + +skip_rx_buf_free: + if (!rxr->rx_agg_ring) + goto skip_rx_agg_free; + for (i = 0; i < max_agg_idx; i++) { struct bnxt_sw_rx_agg_bd *rx_agg_buf = &rxr->rx_agg_ring[i]; struct page *page = rx_agg_buf->page; @@ -2852,6 +2862,8 @@ skip_rx_tpa_free: __free_page(page); } + +skip_rx_agg_free: if (rxr->rx_page) { __free_page(rxr->rx_page); rxr->rx_page = NULL; @@ -2900,6 +2912,9 @@ static void bnxt_free_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem) struct pci_dev *pdev = bp->pdev; int i; + if (!rmem->pg_arr) + goto skip_pages; + for (i = 0; i < rmem->nr_pages; i++) { if (!rmem->pg_arr[i]) continue; @@ -2909,6 +2924,7 @@ static void bnxt_free_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem) rmem->pg_arr[i] = NULL; } +skip_pages: if (rmem->pg_tbl) { size_t pg_tbl_size = rmem->nr_pages * 8; @@ -3228,10 +3244,14 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp) static void bnxt_free_cp_arrays(struct bnxt_cp_ring_info *cpr) { + struct bnxt_ring_struct *ring = &cpr->cp_ring_struct; + kfree(cpr->cp_desc_ring); cpr->cp_desc_ring = NULL; + ring->ring_mem.pg_arr = NULL; kfree(cpr->cp_desc_mapping); cpr->cp_desc_mapping = NULL; + ring->ring_mem.dma_arr = NULL; } static int bnxt_alloc_cp_arrays(struct bnxt_cp_ring_info *cpr, int n) @@ -12207,6 +12227,11 @@ static void bnxt_fw_reset_task(struct work_struct *work) return; } + if ((bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) && + bp->fw_health->enabled) { + bp->fw_health->last_fw_reset_cnt = + bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG); + } bp->fw_reset_state = 0; /* Make sure fw_reset_state is 0 before clearing the flag */ smp_mb__before_atomic(); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 46fae1acbeed..e6a4a768b10b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1884,9 +1884,6 @@ bnxt_tc_indr_block_cb_lookup(struct bnxt *bp, struct net_device *netdev) { struct bnxt_flower_indr_block_cb_priv *cb_priv; - /* All callback list access should be protected by RTNL. */ - ASSERT_RTNL(); - list_for_each_entry(cb_priv, &bp->tc_indr_block_list, list) if (cb_priv->tunnel_netdev == netdev) return cb_priv; diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c index 8b7b59908a1a..f66d22de5168 100644 --- a/drivers/net/ethernet/cadence/macb_pci.c +++ b/drivers/net/ethernet/cadence/macb_pci.c @@ -111,9 +111,9 @@ static void macb_remove(struct pci_dev *pdev) struct platform_device *plat_dev = pci_get_drvdata(pdev); struct macb_platform_data *plat_data = dev_get_platdata(&plat_dev->dev); - platform_device_unregister(plat_dev); clk_unregister(plat_data->pclk); clk_unregister(plat_data->hclk); + platform_device_unregister(plat_dev); } static const struct pci_device_id dev_id_table[] = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 22af3d6ce178..adc54a726661 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -61,6 +61,9 @@ static unsigned int tx_sgl = 1; module_param(tx_sgl, uint, 0600); MODULE_PARM_DESC(tx_sgl, "Minimum number of frags when using dma_map_sg() to optimize the IOMMU mapping"); +static bool page_pool_enabled = true; +module_param(page_pool_enabled, bool, 0400); + #define HNS3_SGL_SIZE(nfrag) (sizeof(struct scatterlist) * (nfrag) + \ sizeof(struct sg_table)) #define HNS3_MAX_SGL_SIZE ALIGN(HNS3_SGL_SIZE(HNS3_MAX_TSO_BD_NUM), \ @@ -73,6 +76,7 @@ MODULE_PARM_DESC(tx_sgl, "Minimum number of frags when using dma_map_sg() to opt #define HNS3_OUTER_VLAN_TAG 2 #define HNS3_MIN_TX_LEN 33U +#define HNS3_MIN_TUN_PKT_LEN 65U /* hns3_pci_tbl - PCI Device ID Table * @@ -1424,8 +1428,11 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto, l4.tcp->doff); break; case IPPROTO_UDP: - if (hns3_tunnel_csum_bug(skb)) - return skb_checksum_help(skb); + if (hns3_tunnel_csum_bug(skb)) { + int ret = skb_put_padto(skb, HNS3_MIN_TUN_PKT_LEN); + + return ret ? ret : skb_checksum_help(skb); + } hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1); hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4T_S, @@ -4753,7 +4760,8 @@ static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring) goto out_with_desc_cb; if (!HNAE3_IS_TX_RING(ring)) { - hns3_alloc_page_pool(ring); + if (page_pool_enabled) + hns3_alloc_page_pool(ring); ret = hns3_alloc_ring_buffers(ring); if (ret) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index 68ed1715ac52..87d96f82c318 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -1724,6 +1724,10 @@ hclge_dbg_get_imp_stats_info(struct hclge_dev *hdev, char *buf, int len) } bd_num = le32_to_cpu(req->bd_num); + if (!bd_num) { + dev_err(&hdev->pdev->dev, "imp statistics bd number is 0!\n"); + return -EINVAL; + } desc_src = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); if (!desc_src) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e55ba2e511b1..f1e46ba799f9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1528,9 +1528,10 @@ static void hclge_init_kdump_kernel_config(struct hclge_dev *hdev) static int hclge_configure(struct hclge_dev *hdev) { struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); + const struct cpumask *cpumask = cpu_online_mask; struct hclge_cfg cfg; unsigned int i; - int ret; + int node, ret; ret = hclge_get_cfg(hdev, &cfg); if (ret) @@ -1595,11 +1596,12 @@ static int hclge_configure(struct hclge_dev *hdev) hclge_init_kdump_kernel_config(hdev); - /* Set the init affinity based on pci func number */ - i = cpumask_weight(cpumask_of_node(dev_to_node(&hdev->pdev->dev))); - i = i ? PCI_FUNC(hdev->pdev->devfn) % i : 0; - cpumask_set_cpu(cpumask_local_spread(i, dev_to_node(&hdev->pdev->dev)), - &hdev->affinity_mask); + /* Set the affinity based on numa node */ + node = dev_to_node(&hdev->pdev->dev); + if (node != NUMA_NO_NODE) + cpumask = cpumask_of_node(node); + + cpumask_copy(&hdev->affinity_mask, cpumask); return ret; } @@ -8125,11 +8127,12 @@ static void hclge_ae_stop(struct hnae3_handle *handle) hclge_clear_arfs_rules(hdev); spin_unlock_bh(&hdev->fd_rule_lock); - /* If it is not PF reset, the firmware will disable the MAC, + /* If it is not PF reset or FLR, the firmware will disable the MAC, * so it only need to stop phy here. */ if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) && - hdev->reset_type != HNAE3_FUNC_RESET) { + hdev->reset_type != HNAE3_FUNC_RESET && + hdev->reset_type != HNAE3_FLR_RESET) { hclge_mac_stop_phy(hdev); hclge_update_link_status(hdev); return; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 82e727020120..a69e892277b3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2465,6 +2465,8 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data) hclgevf_enable_vector(&hdev->misc_vector, false); event_cause = hclgevf_check_evt_cause(hdev, &clearval); + if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER) + hclgevf_clear_event_cause(hdev, clearval); switch (event_cause) { case HCLGEVF_VECTOR0_EVENT_RST: @@ -2477,10 +2479,8 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data) break; } - if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER) { - hclgevf_clear_event_cause(hdev, clearval); + if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER) hclgevf_enable_vector(&hdev->misc_vector, true); - } return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index b8a40146b895..b482f6f633bd 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -1144,7 +1144,7 @@ static struct net_device * __init i82596_probe(void) err = -ENODEV; goto out; } - memcpy(eth_addr, (void *) 0xfffc1f2c, ETH_ALEN); /* YUCK! Get addr from NOVRAM */ + memcpy(eth_addr, absolute_pointer(0xfffc1f2c), ETH_ALEN); /* YUCK! Get addr from NOVRAM */ dev->base_addr = MVME_I596_BASE; dev->irq = (unsigned) MVME16x_IRQ_I596; goto found; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index a775c69e4fd7..a4579b340120 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -4700,6 +4700,22 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, return 0; } + if (adapter->failover_pending) { + adapter->init_done_rc = -EAGAIN; + netdev_dbg(netdev, "Failover pending, ignoring login response\n"); + complete(&adapter->init_done); + /* login response buffer will be released on reset */ + return 0; + } + + if (adapter->failover_pending) { + adapter->init_done_rc = -EAGAIN; + netdev_dbg(netdev, "Failover pending, ignoring login response\n"); + complete(&adapter->init_done); + /* login response buffer will be released on reset */ + return 0; + } + netdev->mtu = adapter->req_mtu - ETH_HLEN; netdev_dbg(adapter->netdev, "Login Response Buffer:\n"); diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index eadcb9958346..3c4f08d20414 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -695,6 +695,7 @@ static inline void ice_set_rdma_cap(struct ice_pf *pf) { if (pf->hw.func_caps.common_cap.rdma && pf->num_rdma_msix) { set_bit(ICE_FLAG_RDMA_ENA, pf->flags); + set_bit(ICE_FLAG_AUX_ENA, pf->flags); ice_plug_aux_dev(pf); } } @@ -707,5 +708,6 @@ static inline void ice_clear_rdma_cap(struct ice_pf *pf) { ice_unplug_aux_dev(pf); clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); + clear_bit(ICE_FLAG_AUX_ENA, pf->flags); } #endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index 1f2afdf6cd48..adcc9a251595 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -271,6 +271,12 @@ int ice_plug_aux_dev(struct ice_pf *pf) struct auxiliary_device *adev; int ret; + /* if this PF doesn't support a technology that requires auxiliary + * devices, then gracefully exit + */ + if (!ice_is_aux_ena(pf)) + return 0; + iadev = kzalloc(sizeof(*iadev), GFP_KERNEL); if (!iadev) return -ENOMEM; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index b877efae61df..0e19b4d02e62 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6350,7 +6350,9 @@ static int igc_probe(struct pci_dev *pdev, if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; - netdev->vlan_features |= netdev->features; + netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; + netdev->mpls_features |= NETIF_F_HW_CSUM; + netdev->hw_enc_features |= netdev->vlan_features; /* MTU range: 68 - 9216 */ netdev->min_mtu = ETH_MIN_MTU; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index e84287ffc7ce..dcf9f27ba2ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -658,11 +658,10 @@ static const struct devlink_param enable_rdma_param = static int mlx5_devlink_rdma_param_register(struct devlink *devlink) { - struct mlx5_core_dev *dev = devlink_priv(devlink); union devlink_param_value value; int err; - if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND) || MLX5_ESWITCH_MANAGER(dev)) + if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) return 0; err = devlink_param_register(devlink, &enable_rdma_param); @@ -679,9 +678,7 @@ static int mlx5_devlink_rdma_param_register(struct devlink *devlink) static void mlx5_devlink_rdma_param_unregister(struct devlink *devlink) { - struct mlx5_core_dev *dev = devlink_priv(devlink); - - if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND) || MLX5_ESWITCH_MANAGER(dev)) + if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) return; devlink_param_unpublish(devlink, &enable_rdma_param); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 3f8a98093f8c..f9cf9fb31547 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -1007,7 +1007,7 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) err = mlx5_core_alloc_pd(dev, &tracer->buff.pdn); if (err) { mlx5_core_warn(dev, "FWTracer: Failed to allocate PD %d\n", err); - return err; + goto err_cancel_work; } err = mlx5_fw_tracer_create_mkey(tracer); @@ -1031,6 +1031,7 @@ err_notifier_unregister: mlx5_core_destroy_mkey(dev, &tracer->buff.mkey); err_dealloc_pd: mlx5_core_dealloc_pd(dev, tracer->buff.pdn); +err_cancel_work: cancel_work_sync(&tracer->read_fw_strings_work); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 669a75f3537a..7b8c8187543a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -922,7 +922,7 @@ void mlx5e_set_rx_mode_work(struct work_struct *work); int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr); int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr); -int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val); +int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val, bool rx_filter); int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 0c38c2e319be..b5ddaa82755f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -137,7 +137,7 @@ static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr u16 vport_num, esw_owner_vhca_id; struct netlink_ext_ack *extack; int ifindex = upper->ifindex; - int err; + int err = 0; if (!netif_is_bridge_master(upper)) return 0; @@ -244,7 +244,7 @@ mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev, struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_attr_info->info); const struct switchdev_attr *attr = port_attr_info->attr; u16 vport_num, esw_owner_vhca_id; - int err; + int err = 0; if (!mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num, &esw_owner_vhca_id)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 51a4d80f7fa3..de03684528bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -300,9 +300,6 @@ mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv, { struct mlx5e_rep_indr_block_priv *cb_priv; - /* All callback list access should be protected by RTNL. */ - ASSERT_RTNL(); - list_for_each_entry(cb_priv, &rpriv->uplink_priv.tc_indr_block_priv_list, list) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index bf0313e2682b..13056cb9757d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -572,7 +572,7 @@ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_chann if (res->features & MLX5E_RX_RES_FEATURE_PTP) { u32 rqn; - if (mlx5e_channels_get_ptp_rqn(chs, &rqn)) + if (!mlx5e_channels_get_ptp_rqn(chs, &rqn)) rqn = res->drop_rqn; err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 2cfd12953909..306fb5d6a36d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1884,7 +1884,7 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) return set_pflag_cqe_based_moder(netdev, enable, true); } -int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val) +int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val, bool rx_filter) { bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS); struct mlx5e_params new_params; @@ -1896,8 +1896,7 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val if (curr_val == new_val) return 0; - if (new_val && !priv->profile->rx_ptp_support && - priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) { + if (new_val && !priv->profile->rx_ptp_support && rx_filter) { netdev_err(priv->netdev, "Profile doesn't support enabling of CQE compression while hardware time-stamping is enabled.\n"); return -EINVAL; @@ -1905,7 +1904,7 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val new_params = priv->channels.params; MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); - if (priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) + if (rx_filter) new_params.ptp_rx = new_val; if (new_params.ptp_rx == priv->channels.params.ptp_rx) @@ -1928,12 +1927,14 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + bool rx_filter; int err; if (!MLX5_CAP_GEN(mdev, cqe_compression)) return -EOPNOTSUPP; - err = mlx5e_modify_rx_cqe_compression_locked(priv, enable); + rx_filter = priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE; + err = mlx5e_modify_rx_cqe_compression_locked(priv, enable, rx_filter); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 47efd858964d..3fd515e7bf30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3554,14 +3554,14 @@ static int mlx5e_hwstamp_config_no_ptp_rx(struct mlx5e_priv *priv, bool rx_filte if (!rx_filter) /* Reset CQE compression to Admin default */ - return mlx5e_modify_rx_cqe_compression_locked(priv, rx_cqe_compress_def); + return mlx5e_modify_rx_cqe_compression_locked(priv, rx_cqe_compress_def, false); if (!MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) return 0; /* Disable CQE compression */ netdev_warn(priv->netdev, "Disabling RX cqe compression\n"); - err = mlx5e_modify_rx_cqe_compression_locked(priv, false); + err = mlx5e_modify_rx_cqe_compression_locked(priv, false, true); if (err) netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 9fe8e3c204d6..fe501ba88bea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1682,14 +1682,13 @@ static int build_match_list(struct match_list *match_head, curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); if (!curr_match) { + rcu_read_unlock(); free_match_list(match_head, ft_locked); - err = -ENOMEM; - goto out; + return -ENOMEM; } curr_match->g = g; list_add_tail(&curr_match->list, &match_head->list); } -out: rcu_read_unlock(); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 49ca57c6d31d..ca5690b0a7ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -927,9 +927,12 @@ void mlx5_lag_disable_change(struct mlx5_core_dev *dev) struct mlx5_core_dev *dev1; struct mlx5_lag *ldev; + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + mlx5_dev_list_lock(); - ldev = mlx5_lag_dev(dev); dev0 = ldev->pf[MLX5_LAG_P1].dev; dev1 = ldev->pf[MLX5_LAG_P2].dev; @@ -946,8 +949,11 @@ void mlx5_lag_enable_change(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; - mlx5_dev_list_lock(); ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + mlx5_dev_list_lock(); ldev->mode_changes_in_progress--; mlx5_dev_list_unlock(); mlx5_queue_bond_work(ldev, 0); diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index 3e85b17f5857..6704f5c1aa32 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -142,6 +142,13 @@ static int mlxbf_gige_open(struct net_device *netdev) err = mlxbf_gige_clean_port(priv); if (err) goto free_irqs; + + /* Clear driver's valid_polarity to match hardware, + * since the above call to clean_port() resets the + * receive polarity used by hardware. + */ + priv->valid_polarity = 0; + err = mlxbf_gige_rx_init(priv); if (err) goto free_irqs; diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index c1310ea1c216..d5c485a6d284 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -398,9 +398,7 @@ static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth, int err; u16 i; - dma_buf = kzalloc(sizeof(*dma_buf) + - q_depth * sizeof(struct hwc_work_request), - GFP_KERNEL); + dma_buf = kzalloc(struct_size(dma_buf, reqs, q_depth), GFP_KERNEL); if (!dma_buf) return -ENOMEM; diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 556c3495211d..64c0ef57ad42 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1767,9 +1767,6 @@ nfp_flower_indr_block_cb_priv_lookup(struct nfp_app *app, struct nfp_flower_indr_block_cb_priv *cb_priv; struct nfp_flower_priv *priv = app->priv; - /* All callback list access should be protected by RTNL. */ - ASSERT_RTNL(); - list_for_each_entry(cb_priv, &priv->indr_block_cb_priv, list) if (cb_priv->netdev == netdev) return cb_priv; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 6e5a6cc97d0e..24cd41567775 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -3367,6 +3367,7 @@ qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn, struct qed_nvm_image_att *p_image_att) { enum nvm_image_type type; + int rc; u32 i; /* Translate image_id into MFW definitions */ @@ -3395,7 +3396,10 @@ qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn, return -EINVAL; } - qed_mcp_nvm_info_populate(p_hwfn); + rc = qed_mcp_nvm_info_populate(p_hwfn); + if (rc) + return rc; + for (i = 0; i < p_hwfn->nvm_info.num_images; i++) if (type == p_hwfn->nvm_info.image_att[i].image_type) break; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index 0a2f34fc8b24..27dffa299ca6 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -1354,10 +1354,10 @@ static int qlcnic_83xx_copy_fw_file(struct qlcnic_adapter *adapter) struct qlc_83xx_fw_info *fw_info = adapter->ahw->fw_info; const struct firmware *fw = fw_info->fw; u32 dest, *p_cache, *temp; - int i, ret = -EIO; __le32 *temp_le; u8 data[16]; size_t size; + int i, ret; u64 addr; temp = vzalloc(fw->size); diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index 4b2eca5e08e2..01ef5efd7bc2 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -119,6 +119,8 @@ #define PHY_ST 0x8A /* PHY status register */ #define MAC_SM 0xAC /* MAC status machine */ #define MAC_SM_RST 0x0002 /* MAC status machine reset */ +#define MD_CSC 0xb6 /* MDC speed control register */ +#define MD_CSC_DEFAULT 0x0030 #define MAC_ID 0xBE /* Identifier register */ #define TX_DCNT 0x80 /* TX descriptor count */ @@ -355,8 +357,9 @@ static void r6040_reset_mac(struct r6040_private *lp) { void __iomem *ioaddr = lp->base; int limit = MAC_DEF_TIMEOUT; - u16 cmd; + u16 cmd, md_csc; + md_csc = ioread16(ioaddr + MD_CSC); iowrite16(MAC_RST, ioaddr + MCR1); while (limit--) { cmd = ioread16(ioaddr + MCR1); @@ -368,6 +371,10 @@ static void r6040_reset_mac(struct r6040_private *lp) iowrite16(MAC_SM_RST, ioaddr + MAC_SM); iowrite16(0, ioaddr + MAC_SM); mdelay(5); + + /* Restore MDIO clock frequency */ + if (md_csc != MD_CSC_DEFAULT) + iowrite16(md_csc, ioaddr + MD_CSC); } static void r6040_init_mac_regs(struct net_device *dev) diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index e5b0d795c301..3dbea028b325 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -166,32 +166,46 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, * We need a channel per event queue, plus a VI per tx queue. * This may be more pessimistic than it needs to be. */ - if (n_channels + n_xdp_ev > max_channels) { - netif_err(efx, drv, efx->net_dev, - "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n", - n_xdp_ev, n_channels, max_channels); - netif_err(efx, drv, efx->net_dev, - "XDP_TX and XDP_REDIRECT will not work on this interface"); - efx->n_xdp_channels = 0; - efx->xdp_tx_per_channel = 0; - efx->xdp_tx_queue_count = 0; + if (n_channels >= max_channels) { + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED; + netif_warn(efx, drv, efx->net_dev, + "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n", + n_xdp_ev, n_channels, max_channels); + netif_warn(efx, drv, efx->net_dev, + "XDP_TX and XDP_REDIRECT might decrease device's performance\n"); } else if (n_channels + n_xdp_tx > efx->max_vis) { - netif_err(efx, drv, efx->net_dev, - "Insufficient resources for %d XDP TX queues (%d other channels, max VIs %d)\n", - n_xdp_tx, n_channels, efx->max_vis); - netif_err(efx, drv, efx->net_dev, - "XDP_TX and XDP_REDIRECT will not work on this interface"); - efx->n_xdp_channels = 0; - efx->xdp_tx_per_channel = 0; - efx->xdp_tx_queue_count = 0; + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED; + netif_warn(efx, drv, efx->net_dev, + "Insufficient resources for %d XDP TX queues (%d other channels, max VIs %d)\n", + n_xdp_tx, n_channels, efx->max_vis); + netif_warn(efx, drv, efx->net_dev, + "XDP_TX and XDP_REDIRECT might decrease device's performance\n"); + } else if (n_channels + n_xdp_ev > max_channels) { + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_SHARED; + netif_warn(efx, drv, efx->net_dev, + "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n", + n_xdp_ev, n_channels, max_channels); + + n_xdp_ev = max_channels - n_channels; + netif_warn(efx, drv, efx->net_dev, + "XDP_TX and XDP_REDIRECT will work with reduced performance (%d cpus/tx_queue)\n", + DIV_ROUND_UP(n_xdp_tx, tx_per_ev * n_xdp_ev)); } else { + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_DEDICATED; + } + + if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_BORROWED) { efx->n_xdp_channels = n_xdp_ev; efx->xdp_tx_per_channel = tx_per_ev; efx->xdp_tx_queue_count = n_xdp_tx; n_channels += n_xdp_ev; netif_dbg(efx, drv, efx->net_dev, "Allocating %d TX and %d event queues for XDP\n", - n_xdp_tx, n_xdp_ev); + n_xdp_ev * tx_per_ev, n_xdp_ev); + } else { + efx->n_xdp_channels = 0; + efx->xdp_tx_per_channel = 0; + efx->xdp_tx_queue_count = n_xdp_tx; } if (vec_count < n_channels) { @@ -858,6 +872,20 @@ rollback: goto out; } +static inline int +efx_set_xdp_tx_queue(struct efx_nic *efx, int xdp_queue_number, + struct efx_tx_queue *tx_queue) +{ + if (xdp_queue_number >= efx->xdp_tx_queue_count) + return -EINVAL; + + netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n", + tx_queue->channel->channel, tx_queue->label, + xdp_queue_number, tx_queue->queue); + efx->xdp_tx_queues[xdp_queue_number] = tx_queue; + return 0; +} + int efx_set_channels(struct efx_nic *efx) { struct efx_tx_queue *tx_queue; @@ -896,20 +924,9 @@ int efx_set_channels(struct efx_nic *efx) if (efx_channel_is_xdp_tx(channel)) { efx_for_each_channel_tx_queue(tx_queue, channel) { tx_queue->queue = next_queue++; - - /* We may have a few left-over XDP TX - * queues owing to xdp_tx_queue_count - * not dividing evenly by EFX_MAX_TXQ_PER_CHANNEL. - * We still allocate and probe those - * TXQs, but never use them. - */ - if (xdp_queue_number < efx->xdp_tx_queue_count) { - netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n", - channel->channel, tx_queue->label, - xdp_queue_number, tx_queue->queue); - efx->xdp_tx_queues[xdp_queue_number] = tx_queue; + rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue); + if (rc == 0) xdp_queue_number++; - } } } else { efx_for_each_channel_tx_queue(tx_queue, channel) { @@ -918,10 +935,35 @@ int efx_set_channels(struct efx_nic *efx) channel->channel, tx_queue->label, tx_queue->queue); } + + /* If XDP is borrowing queues from net stack, it must use the queue + * with no csum offload, which is the first one of the channel + * (note: channel->tx_queue_by_type is not initialized yet) + */ + if (efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_BORROWED) { + tx_queue = &channel->tx_queue[0]; + rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue); + if (rc == 0) + xdp_queue_number++; + } } } } - WARN_ON(xdp_queue_number != efx->xdp_tx_queue_count); + WARN_ON(efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_DEDICATED && + xdp_queue_number != efx->xdp_tx_queue_count); + WARN_ON(efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED && + xdp_queue_number > efx->xdp_tx_queue_count); + + /* If we have more CPUs than assigned XDP TX queues, assign the already + * existing queues to the exceeding CPUs + */ + next_queue = 0; + while (xdp_queue_number < efx->xdp_tx_queue_count) { + tx_queue = efx->xdp_tx_queues[next_queue++]; + rc = efx_set_xdp_tx_queue(efx, xdp_queue_number, tx_queue); + if (rc == 0) + xdp_queue_number++; + } rc = netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); if (rc) diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 9b4b25704271..f6981810039d 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -782,6 +782,12 @@ struct efx_async_filter_insertion { #define EFX_RPS_MAX_IN_FLIGHT 8 #endif /* CONFIG_RFS_ACCEL */ +enum efx_xdp_tx_queues_mode { + EFX_XDP_TX_QUEUES_DEDICATED, /* one queue per core, locking not needed */ + EFX_XDP_TX_QUEUES_SHARED, /* each queue used by more than 1 core */ + EFX_XDP_TX_QUEUES_BORROWED /* queues borrowed from net stack */ +}; + /** * struct efx_nic - an Efx NIC * @name: Device name (net device name or bus id before net device registered) @@ -820,6 +826,7 @@ struct efx_async_filter_insertion { * should be allocated for this NIC * @xdp_tx_queue_count: Number of entries in %xdp_tx_queues. * @xdp_tx_queues: Array of pointers to tx queues used for XDP transmit. + * @xdp_txq_queues_mode: XDP TX queues sharing strategy. * @rxq_entries: Size of receive queues requested by user. * @txq_entries: Size of transmit queues requested by user. * @txq_stop_thresh: TX queue fill level at or above which we stop it. @@ -979,6 +986,7 @@ struct efx_nic { unsigned int xdp_tx_queue_count; struct efx_tx_queue **xdp_tx_queues; + enum efx_xdp_tx_queues_mode xdp_txq_queues_mode; unsigned rxq_entries; unsigned txq_entries; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 0c6650d2e239..d16e031e95f4 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -428,23 +428,32 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, unsigned int len; int space; int cpu; - int i; + int i = 0; - cpu = raw_smp_processor_id(); + if (unlikely(n && !xdpfs)) + return -EINVAL; + if (unlikely(!n)) + return 0; - if (!efx->xdp_tx_queue_count || - unlikely(cpu >= efx->xdp_tx_queue_count)) + cpu = raw_smp_processor_id(); + if (unlikely(cpu >= efx->xdp_tx_queue_count)) return -EINVAL; tx_queue = efx->xdp_tx_queues[cpu]; if (unlikely(!tx_queue)) return -EINVAL; - if (unlikely(n && !xdpfs)) - return -EINVAL; + if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED) + HARD_TX_LOCK(efx->net_dev, tx_queue->core_txq, cpu); - if (!n) - return 0; + /* If we're borrowing net stack queues we have to handle stop-restart + * or we might block the queue and it will be considered as frozen + */ + if (efx->xdp_txq_queues_mode == EFX_XDP_TX_QUEUES_BORROWED) { + if (netif_tx_queue_stopped(tx_queue->core_txq)) + goto unlock; + efx_tx_maybe_stop_queue(tx_queue); + } /* Check for available space. We should never need multiple * descriptors per frame. @@ -484,6 +493,10 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, if (flush && i > 0) efx_nic_push_buffers(tx_queue); +unlock: + if (efx->xdp_txq_queues_mode != EFX_XDP_TX_QUEUES_DEDICATED) + HARD_TX_UNLOCK(efx->net_dev, tx_queue->core_txq); + return i == 0 ? -EIO : i; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ece02b35a6ce..553c4403258a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -309,7 +309,7 @@ static void stmmac_clk_csr_set(struct stmmac_priv *priv) priv->clk_csr = STMMAC_CSR_100_150M; else if ((clk_rate >= CSR_F_150M) && (clk_rate < CSR_F_250M)) priv->clk_csr = STMMAC_CSR_150_250M; - else if ((clk_rate >= CSR_F_250M) && (clk_rate < CSR_F_300M)) + else if ((clk_rate >= CSR_F_250M) && (clk_rate <= CSR_F_300M)) priv->clk_csr = STMMAC_CSR_250_300M; } @@ -7118,7 +7118,6 @@ int stmmac_suspend(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct stmmac_priv *priv = netdev_priv(ndev); u32 chan; - int ret; if (!ndev || !netif_running(ndev)) return 0; @@ -7150,13 +7149,6 @@ int stmmac_suspend(struct device *dev) } else { stmmac_mac_set(priv, priv->ioaddr, false); pinctrl_pm_select_sleep_state(priv->device); - /* Disable clock in case of PWM is off */ - clk_disable_unprepare(priv->plat->clk_ptp_ref); - ret = pm_runtime_force_suspend(dev); - if (ret) { - mutex_unlock(&priv->lock); - return ret; - } } mutex_unlock(&priv->lock); @@ -7242,12 +7234,6 @@ int stmmac_resume(struct device *dev) priv->irq_wake = 0; } else { pinctrl_pm_select_default_state(priv->device); - /* enable the clk previously disabled */ - ret = pm_runtime_force_resume(dev); - if (ret) - return ret; - if (priv->plat->clk_ptp_ref) - clk_prepare_enable(priv->plat->clk_ptp_ref); /* reset the phy so that it's ready */ if (priv->mii) stmmac_mdio_reset(priv->mii); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 5ca710844cc1..62cec9bfcd33 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -9,6 +9,7 @@ *******************************************************************************/ #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include <linux/module.h> #include <linux/io.h> #include <linux/of.h> @@ -771,9 +772,52 @@ static int __maybe_unused stmmac_runtime_resume(struct device *dev) return stmmac_bus_clks_config(priv, true); } +static int __maybe_unused stmmac_pltfr_noirq_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + int ret; + + if (!netif_running(ndev)) + return 0; + + if (!device_may_wakeup(priv->device) || !priv->plat->pmt) { + /* Disable clock in case of PWM is off */ + clk_disable_unprepare(priv->plat->clk_ptp_ref); + + ret = pm_runtime_force_suspend(dev); + if (ret) + return ret; + } + + return 0; +} + +static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + int ret; + + if (!netif_running(ndev)) + return 0; + + if (!device_may_wakeup(priv->device) || !priv->plat->pmt) { + /* enable the clk previously disabled */ + ret = pm_runtime_force_resume(dev); + if (ret) + return ret; + + clk_prepare_enable(priv->plat->clk_ptp_ref); + } + + return 0; +} + const struct dev_pm_ops stmmac_pltfr_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(stmmac_pltfr_suspend, stmmac_pltfr_resume) SET_RUNTIME_PM_OPS(stmmac_runtime_suspend, stmmac_runtime_resume, NULL) + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(stmmac_pltfr_noirq_suspend, stmmac_pltfr_noirq_resume) }; EXPORT_SYMBOL_GPL(stmmac_pltfr_pm_ops); diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 8fe8887d506a..6192244b304a 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -68,9 +68,9 @@ #define SIXP_DAMA_OFF 0 /* default level 2 parameters */ -#define SIXP_TXDELAY (HZ/4) /* in 1 s */ +#define SIXP_TXDELAY 25 /* 250 ms */ #define SIXP_PERSIST 50 /* in 256ths */ -#define SIXP_SLOTTIME (HZ/10) /* in 1 s */ +#define SIXP_SLOTTIME 10 /* 100 ms */ #define SIXP_INIT_RESYNC_TIMEOUT (3*HZ/2) /* in 1 s */ #define SIXP_RESYNC_TIMEOUT 5*HZ /* in 1 s */ diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c index b50b7fafd8d6..f4c3efc3e074 100644 --- a/drivers/net/hamradio/dmascc.c +++ b/drivers/net/hamradio/dmascc.c @@ -973,7 +973,7 @@ static inline void tx_on(struct scc_priv *priv) flags = claim_dma_lock(); set_dma_mode(priv->param.dma, DMA_MODE_WRITE); set_dma_addr(priv->param.dma, - (int) priv->tx_buf[priv->tx_tail] + n); + virt_to_bus(priv->tx_buf[priv->tx_tail]) + n); set_dma_count(priv->param.dma, priv->tx_len[priv->tx_tail] - n); release_dma_lock(flags); @@ -1020,7 +1020,7 @@ static inline void rx_on(struct scc_priv *priv) flags = claim_dma_lock(); set_dma_mode(priv->param.dma, DMA_MODE_READ); set_dma_addr(priv->param.dma, - (int) priv->rx_buf[priv->rx_head]); + virt_to_bus(priv->rx_buf[priv->rx_head])); set_dma_count(priv->param.dma, BUF_SIZE); release_dma_lock(flags); enable_dma(priv->param.dma); @@ -1233,7 +1233,7 @@ static void special_condition(struct scc_priv *priv, int rc) if (priv->param.dma >= 0) { flags = claim_dma_lock(); set_dma_addr(priv->param.dma, - (int) priv->rx_buf[priv->rx_head]); + virt_to_bus(priv->rx_buf[priv->rx_head])); set_dma_count(priv->param.dma, BUF_SIZE); release_dma_lock(flags); } else { diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c index 2324e1b93e37..1da334f54944 100644 --- a/drivers/net/ipa/ipa_table.c +++ b/drivers/net/ipa/ipa_table.c @@ -430,7 +430,8 @@ static void ipa_table_init_add(struct gsi_trans *trans, bool filter, * table region determines the number of entries it has. */ if (filter) { - count = hweight32(ipa->filter_map); + /* Include one extra "slot" to hold the filter map itself */ + count = 1 + hweight32(ipa->filter_map); hash_count = hash_mem->size ? count : 0; } else { count = mem->size / sizeof(__le64); diff --git a/drivers/net/phy/dp83640_reg.h b/drivers/net/phy/dp83640_reg.h index 21aa24c741b9..daae7fa58fb8 100644 --- a/drivers/net/phy/dp83640_reg.h +++ b/drivers/net/phy/dp83640_reg.h @@ -5,7 +5,7 @@ #ifndef HAVE_DP83640_REGISTERS #define HAVE_DP83640_REGISTERS -#define PAGE0 0x0000 +/* #define PAGE0 0x0000 */ #define PHYCR2 0x001c /* PHY Control Register 2 */ #define PAGE4 0x0004 diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 9e2891d8e8dd..ba5ad86ec826 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -233,9 +233,11 @@ static DEFINE_MUTEX(phy_fixup_lock); static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) { + struct device_driver *drv = phydev->mdio.dev.driver; + struct phy_driver *phydrv = to_phy_driver(drv); struct net_device *netdev = phydev->attached_dev; - if (!phydev->drv->suspend) + if (!drv || !phydrv->suspend) return false; /* PHY not attached? May suspend if the PHY has not already been diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index a1464b764d4d..0a0abe8e4be0 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1607,6 +1607,32 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, if (config.an_enabled && phylink_is_empty_linkmode(config.advertising)) return -EINVAL; + /* If this link is with an SFP, ensure that changes to advertised modes + * also cause the associated interface to be selected such that the + * link can be configured correctly. + */ + if (pl->sfp_port && pl->sfp_bus) { + config.interface = sfp_select_interface(pl->sfp_bus, + config.advertising); + if (config.interface == PHY_INTERFACE_MODE_NA) { + phylink_err(pl, + "selection of interface failed, advertisement %*pb\n", + __ETHTOOL_LINK_MODE_MASK_NBITS, + config.advertising); + return -EINVAL; + } + + /* Revalidate with the selected interface */ + linkmode_copy(support, pl->supported); + if (phylink_validate(pl, support, &config)) { + phylink_err(pl, "validation of %s/%s with support %*pb failed\n", + phylink_an_mode_str(pl->cur_link_an_mode), + phy_modes(config.interface), + __ETHTOOL_LINK_MODE_MASK_NBITS, support); + return -EINVAL; + } + } + mutex_lock(&pl->state_mutex); pl->link_config.speed = config.speed; pl->link_config.duplex = config.duplex; @@ -2186,7 +2212,9 @@ static int phylink_sfp_config(struct phylink *pl, u8 mode, if (phy_interface_mode_is_8023z(iface) && pl->phydev) return -EINVAL; - changed = !linkmode_equal(pl->supported, support); + changed = !linkmode_equal(pl->supported, support) || + !linkmode_equal(pl->link_config.advertising, + config.advertising); if (changed) { linkmode_copy(pl->supported, support); linkmode_copy(pl->link_config.advertising, config.advertising); diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile index f6b92efffc94..480bcd1f6c1c 100644 --- a/drivers/net/wan/Makefile +++ b/drivers/net/wan/Makefile @@ -34,6 +34,8 @@ obj-$(CONFIG_SLIC_DS26522) += slic_ds26522.o clean-files := wanxlfw.inc $(obj)/wanxl.o: $(obj)/wanxlfw.inc +CROSS_COMPILE_M68K = m68k-linux-gnu- + ifeq ($(CONFIG_WANXL_BUILD_FIRMWARE),y) ifeq ($(ARCH),m68k) M68KCC = $(CC) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8679a108f571..6600e138945e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -116,6 +116,8 @@ static struct class *nvme_ns_chr_class; static void nvme_put_subsystem(struct nvme_subsystem *subsys); static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, unsigned nsid); +static void nvme_update_keep_alive(struct nvme_ctrl *ctrl, + struct nvme_command *cmd); /* * Prepare a queue for teardown. @@ -1152,7 +1154,8 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return effects; } -static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) +static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, + struct nvme_command *cmd, int status) { if (effects & NVME_CMD_EFFECTS_CSE_MASK) { nvme_unfreeze(ctrl); @@ -1167,6 +1170,26 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) nvme_queue_scan(ctrl); flush_work(&ctrl->scan_work); } + + switch (cmd->common.opcode) { + case nvme_admin_set_features: + switch (le32_to_cpu(cmd->common.cdw10) & 0xFF) { + case NVME_FEAT_KATO: + /* + * Keep alive commands interval on the host should be + * updated when KATO is modified by Set Features + * commands. + */ + if (!status) + nvme_update_keep_alive(ctrl, cmd); + break; + default: + break; + } + break; + default: + break; + } } int nvme_execute_passthru_rq(struct request *rq) @@ -1181,7 +1204,7 @@ int nvme_execute_passthru_rq(struct request *rq) effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); ret = nvme_execute_rq(disk, rq, false); if (effects) /* nothing to be done for zero cmd effects */ - nvme_passthru_end(ctrl, effects); + nvme_passthru_end(ctrl, effects, cmd, ret); return ret; } @@ -1269,6 +1292,21 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_stop_keep_alive); +static void nvme_update_keep_alive(struct nvme_ctrl *ctrl, + struct nvme_command *cmd) +{ + unsigned int new_kato = + DIV_ROUND_UP(le32_to_cpu(cmd->common.cdw11), 1000); + + dev_info(ctrl->device, + "keep alive interval updated from %u ms to %u ms\n", + ctrl->kato * 1000 / 2, new_kato * 1000 / 2); + + nvme_stop_keep_alive(ctrl); + ctrl->kato = new_kato; + nvme_start_keep_alive(ctrl); +} + /* * In NVMe 1.0 the CNS field was just a binary controller or namespace * flag, thus sending any new CNS opcodes has a big chance of not working. @@ -1302,11 +1340,6 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) return error; } -static bool nvme_multi_css(struct nvme_ctrl *ctrl) -{ - return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI; -} - static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, struct nvme_ns_id_desc *cur, bool *csi_seen) { @@ -1874,6 +1907,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) goto out_unfreeze; } + set_bit(NVME_NS_READY, &ns->flags); blk_mq_unfreeze_queue(ns->disk->queue); if (blk_queue_is_zoned(ns->queue)) { @@ -1885,6 +1919,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) if (nvme_ns_head_multipath(ns->head)) { blk_mq_freeze_queue(ns->head->disk->queue); nvme_update_disk_info(ns->head->disk, ns, id); + nvme_mpath_revalidate_paths(ns); blk_stack_limits(&ns->head->disk->queue->limits, &ns->queue->limits, 0); disk_update_readahead(ns->head->disk); @@ -3489,7 +3524,9 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, lockdep_assert_held(&subsys->lock); list_for_each_entry(h, &subsys->nsheads, entry) { - if (h->ns_id == nsid && nvme_tryget_ns_head(h)) + if (h->ns_id != nsid) + continue; + if (!list_empty(&h->list) && nvme_tryget_ns_head(h)) return h; } @@ -3763,7 +3800,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, nvme_get_ctrl(ctrl); - device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups); + if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups)) + goto out_cleanup_ns_from_list; + if (!nvme_ns_head_multipath(ns->head)) nvme_add_ns_cdev(ns); @@ -3773,6 +3812,11 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, return; + out_cleanup_ns_from_list: + nvme_put_ctrl(ctrl); + down_write(&ctrl->namespaces_rwsem); + list_del_init(&ns->list); + up_write(&ctrl->namespaces_rwsem); out_unlink_ns: mutex_lock(&ctrl->subsys->lock); list_del_rcu(&ns->siblings); @@ -3795,35 +3839,34 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) return; + clear_bit(NVME_NS_READY, &ns->flags); set_capacity(ns->disk, 0); nvme_fault_inject_fini(&ns->fault_inject); mutex_lock(&ns->ctrl->subsys->lock); list_del_rcu(&ns->siblings); + if (list_empty(&ns->head->list)) { + list_del_init(&ns->head->entry); + last_path = true; + } mutex_unlock(&ns->ctrl->subsys->lock); - synchronize_rcu(); /* guarantee not available in head->list */ - nvme_mpath_clear_current_path(ns); - synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */ + /* guarantee not available in head->list */ + synchronize_rcu(); + + /* wait for concurrent submissions */ + if (nvme_mpath_clear_current_path(ns)) + synchronize_srcu(&ns->head->srcu); if (!nvme_ns_head_multipath(ns->head)) nvme_cdev_del(&ns->cdev, &ns->cdev_device); del_gendisk(ns->disk); blk_cleanup_queue(ns->queue); - if (blk_get_integrity(ns->disk)) - blk_integrity_unregister(ns->disk); down_write(&ns->ctrl->namespaces_rwsem); list_del_init(&ns->list); up_write(&ns->ctrl->namespaces_rwsem); - /* Synchronize with nvme_init_ns_head() */ - mutex_lock(&ns->head->subsys->lock); - if (list_empty(&ns->head->list)) { - list_del_init(&ns->head->entry); - last_path = true; - } - mutex_unlock(&ns->head->subsys->lock); if (last_path) nvme_mpath_shutdown_disk(ns->head); nvme_put_ns(ns); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 37ce3e8b1db2..e8ccdd398f78 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -147,6 +147,21 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) mutex_unlock(&ctrl->scan_lock); } +void nvme_mpath_revalidate_paths(struct nvme_ns *ns) +{ + struct nvme_ns_head *head = ns->head; + sector_t capacity = get_capacity(head->disk); + int node; + + list_for_each_entry_rcu(ns, &head->list, siblings) { + if (capacity != get_capacity(ns->disk)) + clear_bit(NVME_NS_READY, &ns->flags); + } + + for_each_node(node) + rcu_assign_pointer(head->current_path[node], NULL); +} + static bool nvme_path_is_disabled(struct nvme_ns *ns) { /* @@ -158,7 +173,7 @@ static bool nvme_path_is_disabled(struct nvme_ns *ns) ns->ctrl->state != NVME_CTRL_DELETING) return true; if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) || - test_bit(NVME_NS_REMOVING, &ns->flags)) + !test_bit(NVME_NS_READY, &ns->flags)) return true; return false; } @@ -465,6 +480,8 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) ctrl->subsys->instance, head->instance); blk_queue_flag_set(QUEUE_FLAG_NONROT, head->disk->queue); + blk_queue_flag_set(QUEUE_FLAG_NOWAIT, head->disk->queue); + /* set to a default value of 512 until the disk is validated */ blk_queue_logical_block_size(head->disk->queue, 512); blk_set_stacking_limits(&head->disk->queue->limits); @@ -583,14 +600,17 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { - unsigned nsid = le32_to_cpu(desc->nsids[n]); - + unsigned nsid; +again: + nsid = le32_to_cpu(desc->nsids[n]); if (ns->head->ns_id < nsid) continue; if (ns->head->ns_id == nsid) nvme_update_ns_ana_state(desc, ns); if (++n == nr_nsids) break; + if (ns->head->ns_id > nsid) + goto again; } up_read(&ctrl->namespaces_rwsem); return 0; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a2e1f298b217..9871c0c9374c 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -456,6 +456,7 @@ struct nvme_ns { #define NVME_NS_DEAD 1 #define NVME_NS_ANA_PENDING 2 #define NVME_NS_FORCE_RO 3 +#define NVME_NS_READY 4 struct cdev cdev; struct device cdev_device; @@ -748,6 +749,7 @@ void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl); void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); bool nvme_mpath_clear_current_path(struct nvme_ns *ns); +void nvme_mpath_revalidate_paths(struct nvme_ns *ns); void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl); void nvme_mpath_shutdown_disk(struct nvme_ns_head *head); @@ -795,6 +797,9 @@ static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns) { return false; } +static inline void nvme_mpath_revalidate_paths(struct nvme_ns *ns) +{ +} static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) { } @@ -887,4 +892,9 @@ struct nvme_ctrl *nvme_ctrl_from_file(struct file *file); struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid); void nvme_put_ns(struct nvme_ns *ns); +static inline bool nvme_multi_css(struct nvme_ctrl *ctrl) +{ + return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI; +} + #endif /* _NVME_H */ diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a68704e39084..042c594bc57e 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -656,8 +656,8 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) return; - nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); + nvme_rdma_destroy_queue_ib(queue); mutex_destroy(&queue->queue_lock); } @@ -1815,14 +1815,10 @@ static int nvme_rdma_conn_established(struct nvme_rdma_queue *queue) for (i = 0; i < queue->queue_size; i++) { ret = nvme_rdma_post_recv(queue, &queue->rsp_ring[i]); if (ret) - goto out_destroy_queue_ib; + return ret; } return 0; - -out_destroy_queue_ib: - nvme_rdma_destroy_queue_ib(queue); - return ret; } static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue, @@ -1916,14 +1912,10 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue) if (ret) { dev_err(ctrl->ctrl.device, "rdma_connect_locked failed (%d).\n", ret); - goto out_destroy_queue_ib; + return ret; } return 0; - -out_destroy_queue_ib: - nvme_rdma_destroy_queue_ib(queue); - return ret; } static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, @@ -1954,8 +1946,6 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_ROUTE_ERROR: case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: - nvme_rdma_destroy_queue_ib(queue); - fallthrough; case RDMA_CM_EVENT_ADDR_ERROR: dev_dbg(queue->ctrl->ctrl.device, "CM error event %d\n", ev->event); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 645025620154..e4249b7dc056 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -45,6 +45,7 @@ struct nvme_tcp_request { u32 pdu_len; u32 pdu_sent; u16 ttag; + __le16 status; struct list_head entry; struct llist_node lentry; __le32 ddgst; @@ -273,6 +274,12 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue) } while (ret > 0); } +static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) +{ + return !list_empty(&queue->send_list) || + !llist_empty(&queue->req_list) || queue->more_requests; +} + static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, bool sync, bool last) { @@ -293,9 +300,10 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, nvme_tcp_send_all(queue); queue->more_requests = false; mutex_unlock(&queue->send_mutex); - } else if (last) { - queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } + + if (last && nvme_tcp_queue_more(queue)) + queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue) @@ -485,6 +493,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl) static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue, struct nvme_completion *cqe) { + struct nvme_tcp_request *req; struct request *rq; rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id); @@ -496,7 +505,11 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue, return -EINVAL; } - if (!nvme_try_complete_req(rq, cqe->status, cqe->result)) + req = blk_mq_rq_to_pdu(rq); + if (req->status == cpu_to_le16(NVME_SC_SUCCESS)) + req->status = cqe->status; + + if (!nvme_try_complete_req(rq, req->status, cqe->result)) nvme_complete_rq(rq); queue->nr_cqe++; @@ -758,7 +771,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH; } else { if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { - nvme_tcp_end_request(rq, NVME_SC_SUCCESS); + nvme_tcp_end_request(rq, + le16_to_cpu(req->status)); queue->nr_cqe++; } nvme_tcp_init_recv_ctx(queue); @@ -788,18 +802,24 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, return 0; if (queue->recv_ddgst != queue->exp_ddgst) { + struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), + pdu->command_id); + struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); + + req->status = cpu_to_le16(NVME_SC_DATA_XFER_ERROR); + dev_err(queue->ctrl->ctrl.device, "data digest error: recv %#x expected %#x\n", le32_to_cpu(queue->recv_ddgst), le32_to_cpu(queue->exp_ddgst)); - return -EIO; } if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), pdu->command_id); + struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); - nvme_tcp_end_request(rq, NVME_SC_SUCCESS); + nvme_tcp_end_request(rq, le16_to_cpu(req->status)); queue->nr_cqe++; } @@ -893,12 +913,6 @@ done: read_unlock_bh(&sk->sk_callback_lock); } -static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) -{ - return !list_empty(&queue->send_list) || - !llist_empty(&queue->req_list) || queue->more_requests; -} - static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) { queue->request = NULL; @@ -1132,8 +1146,7 @@ static void nvme_tcp_io_work(struct work_struct *w) pending = true; else if (unlikely(result < 0)) break; - } else - pending = !llist_empty(&queue->req_list); + } result = nvme_tcp_try_recv(queue); if (result > 0) @@ -2293,6 +2306,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, return ret; req->state = NVME_TCP_SEND_CMD_PDU; + req->status = cpu_to_le16(NVME_SC_SUCCESS); req->offset = 0; req->data_sent = 0; req->pdu_len = 0; diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 0cb98f2bbc8c..aa6d84d8848e 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -1015,7 +1015,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) if (unlikely(ret)) return ret; - if (nvmet_req_passthru_ctrl(req)) + if (nvmet_is_passthru_req(req)) return nvmet_parse_passthru_admin_cmd(req); switch (cmd->common.opcode) { diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 273555127188..be5d82421e3a 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1028,7 +1028,7 @@ nvmet_subsys_attr_version_store_locked(struct nvmet_subsys *subsys, } /* passthru subsystems use the underlying controller's version */ - if (nvmet_passthru_ctrl(subsys)) + if (nvmet_is_passthru_subsys(subsys)) return -EINVAL; ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary); @@ -1067,7 +1067,8 @@ static ssize_t nvmet_subsys_attr_serial_show(struct config_item *item, { struct nvmet_subsys *subsys = to_subsys(item); - return snprintf(page, PAGE_SIZE, "%s\n", subsys->serial); + return snprintf(page, PAGE_SIZE, "%.*s\n", + NVMET_SN_MAX_SIZE, subsys->serial); } static ssize_t diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 66d05eecc2a9..b8425fa34300 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -553,7 +553,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns) mutex_lock(&subsys->lock); ret = 0; - if (nvmet_passthru_ctrl(subsys)) { + if (nvmet_is_passthru_subsys(subsys)) { pr_info("cannot enable both passthru and regular namespaces for a single subsystem"); goto out_unlock; } @@ -869,7 +869,7 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req) if (unlikely(ret)) return ret; - if (nvmet_req_passthru_ctrl(req)) + if (nvmet_is_passthru_req(req)) return nvmet_parse_passthru_io_cmd(req); ret = nvmet_req_find_ns(req); @@ -1206,6 +1206,9 @@ static void nvmet_init_cap(struct nvmet_ctrl *ctrl) ctrl->cap |= (15ULL << 24); /* maximum queue entries supported: */ ctrl->cap |= NVMET_QUEUE_SIZE - 1; + + if (nvmet_is_passthru_subsys(ctrl->subsys)) + nvmet_passthrough_override_cap(ctrl); } struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn, @@ -1363,8 +1366,6 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, goto out_put_subsystem; mutex_init(&ctrl->lock); - nvmet_init_cap(ctrl); - ctrl->port = req->port; INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); @@ -1378,6 +1379,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, kref_init(&ctrl->ref); ctrl->subsys = subsys; + nvmet_init_cap(ctrl); WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES, diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 06dd3d537f07..7143c7fa7464 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -582,7 +582,7 @@ int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys); void nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys); u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req); u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req); -static inline struct nvme_ctrl *nvmet_passthru_ctrl(struct nvmet_subsys *subsys) +static inline bool nvmet_is_passthru_subsys(struct nvmet_subsys *subsys) { return subsys->passthru_ctrl; } @@ -601,18 +601,19 @@ static inline u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req) { return 0; } -static inline struct nvme_ctrl *nvmet_passthru_ctrl(struct nvmet_subsys *subsys) +static inline bool nvmet_is_passthru_subsys(struct nvmet_subsys *subsys) { return NULL; } #endif /* CONFIG_NVME_TARGET_PASSTHRU */ -static inline struct nvme_ctrl * -nvmet_req_passthru_ctrl(struct nvmet_req *req) +static inline bool nvmet_is_passthru_req(struct nvmet_req *req) { - return nvmet_passthru_ctrl(nvmet_req_subsys(req)); + return nvmet_is_passthru_subsys(nvmet_req_subsys(req)); } +void nvmet_passthrough_override_cap(struct nvmet_ctrl *ctrl); + u16 errno_to_nvme_status(struct nvmet_req *req, int errno); u16 nvmet_report_invalid_opcode(struct nvmet_req *req); diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 225cd1ffbe45..f0efb3537989 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -20,6 +20,16 @@ MODULE_IMPORT_NS(NVME_TARGET_PASSTHRU); */ static DEFINE_XARRAY(passthru_subsystems); +void nvmet_passthrough_override_cap(struct nvmet_ctrl *ctrl) +{ + /* + * Multiple command set support can only be declared if the underlying + * controller actually supports it. + */ + if (!nvme_multi_css(ctrl->subsys->passthru_ctrl)) + ctrl->cap &= ~(1ULL << 43); +} + static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; @@ -218,7 +228,7 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) static void nvmet_passthru_execute_cmd(struct nvmet_req *req) { - struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req); + struct nvme_ctrl *ctrl = nvmet_req_subsys(req)->passthru_ctrl; struct request_queue *q = ctrl->admin_q; struct nvme_ns *ns = NULL; struct request *rq = NULL; @@ -299,7 +309,7 @@ out: */ static void nvmet_passthru_set_host_behaviour(struct nvmet_req *req) { - struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req); + struct nvme_ctrl *ctrl = nvmet_req_subsys(req)->passthru_ctrl; struct nvme_feat_host_behavior *host; u16 status = NVME_SC_INTERNAL; int ret; diff --git a/drivers/of/device.c b/drivers/of/device.c index 5b043ee30824..b0800c260f64 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -85,7 +85,11 @@ of_dma_set_restricted_buffer(struct device *dev, struct device_node *np) break; } - if (i != count && of_reserved_mem_device_init_by_idx(dev, of_node, i)) + /* + * Attempt to initialize a restricted-dma-pool region if one was found. + * Note that count can hold a negative error code. + */ + if (i < count && of_reserved_mem_device_init_by_idx(dev, of_node, i)) dev_warn(dev, "failed to initialise \"restricted-dma-pool\" memory node\n"); } diff --git a/drivers/of/property.c b/drivers/of/property.c index 0c0dc2e369c0..a3483484a5a2 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1291,7 +1291,6 @@ DEFINE_SIMPLE_PROP(pwms, "pwms", "#pwm-cells") DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells") DEFINE_SIMPLE_PROP(leds, "leds", NULL) DEFINE_SIMPLE_PROP(backlight, "backlight", NULL) -DEFINE_SIMPLE_PROP(phy_handle, "phy-handle", NULL) DEFINE_SUFFIX_PROP(regulators, "-supply", NULL) DEFINE_SUFFIX_PROP(gpio, "-gpio", "#gpio-cells") @@ -1380,7 +1379,6 @@ static const struct supplier_bindings of_supplier_bindings[] = { { .parse_prop = parse_resets, }, { .parse_prop = parse_leds, }, { .parse_prop = parse_backlight, }, - { .parse_prop = parse_phy_handle, }, { .parse_prop = parse_gpio_compat, }, { .parse_prop = parse_interrupts, }, { .parse_prop = parse_regulators, }, @@ -1444,6 +1442,9 @@ static int of_fwnode_add_links(struct fwnode_handle *fwnode) struct property *p; struct device_node *con_np = to_of_node(fwnode); + if (IS_ENABLED(CONFIG_X86)) + return 0; + if (!con_np) return -EINVAL; diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index 889d7ce282eb..952a92504df6 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -156,15 +156,6 @@ static inline struct dino_device *DINO_DEV(struct pci_hba_data *hba) return container_of(hba, struct dino_device, hba); } -/* Check if PCI device is behind a Card-mode Dino. */ -static int pci_dev_is_behind_card_dino(struct pci_dev *dev) -{ - struct dino_device *dino_dev; - - dino_dev = DINO_DEV(parisc_walk_tree(dev->bus->bridge)); - return is_card_dino(&dino_dev->hba.dev->id); -} - /* * Dino Configuration Space Accessor Functions */ @@ -447,6 +438,15 @@ static void quirk_cirrus_cardbus(struct pci_dev *dev) DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_6832, quirk_cirrus_cardbus ); #ifdef CONFIG_TULIP +/* Check if PCI device is behind a Card-mode Dino. */ +static int pci_dev_is_behind_card_dino(struct pci_dev *dev) +{ + struct dino_device *dino_dev; + + dino_dev = DINO_DEV(parisc_walk_tree(dev->bus->bridge)); + return is_card_dino(&dino_dev->hba.dev->id); +} + static void pci_fixup_tulip(struct pci_dev *dev) { if (!pci_dev_is_behind_card_dino(dev)) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index a1b1e2a01632..0f40943a9a18 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -937,7 +937,7 @@ static struct acpi_device *acpi_pci_find_companion(struct device *dev); void pci_set_acpi_fwnode(struct pci_dev *dev) { - if (!ACPI_COMPANION(&dev->dev) && !pci_dev_is_added(dev)) + if (!dev_fwnode(&dev->dev) && !pci_dev_is_added(dev)) ACPI_COMPANION_SET(&dev->dev, acpi_pci_find_companion(&dev->dev)); } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e5089af8ad90..4537d1ea14fd 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5435,7 +5435,7 @@ DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_CLASS_MULTIMEDIA_HD_AUDIO, 8, quirk_gpu_hda); /* - * Create device link for NVIDIA GPU with integrated USB xHCI Host + * Create device link for GPUs with integrated USB xHCI Host * controller to VGA. */ static void quirk_gpu_usb(struct pci_dev *usb) @@ -5444,9 +5444,11 @@ static void quirk_gpu_usb(struct pci_dev *usb) } DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_CLASS_SERIAL_USB, 8, quirk_gpu_usb); +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, PCI_ANY_ID, + PCI_CLASS_SERIAL_USB, 8, quirk_gpu_usb); /* - * Create device link for NVIDIA GPU with integrated Type-C UCSI controller + * Create device link for GPUs with integrated Type-C UCSI controller * to VGA. Currently there is no class code defined for UCSI device over PCI * so using UNKNOWN class for now and it will be updated when UCSI * over PCI gets a class code. @@ -5459,6 +5461,9 @@ static void quirk_gpu_usb_typec_ucsi(struct pci_dev *ucsi) DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_CLASS_SERIAL_UNKNOWN, 8, quirk_gpu_usb_typec_ucsi); +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, PCI_ANY_ID, + PCI_CLASS_SERIAL_UNKNOWN, 8, + quirk_gpu_usb_typec_ucsi); /* * Enable the NVIDIA GPU integrated HDA controller if the BIOS left it diff --git a/drivers/pci/vpd.c b/drivers/pci/vpd.c index 25557b272a4f..4be24890132e 100644 --- a/drivers/pci/vpd.c +++ b/drivers/pci/vpd.c @@ -99,6 +99,24 @@ error: return off ?: PCI_VPD_SZ_INVALID; } +static bool pci_vpd_available(struct pci_dev *dev) +{ + struct pci_vpd *vpd = &dev->vpd; + + if (!vpd->cap) + return false; + + if (vpd->len == 0) { + vpd->len = pci_vpd_size(dev); + if (vpd->len == PCI_VPD_SZ_INVALID) { + vpd->cap = 0; + return false; + } + } + + return true; +} + /* * Wait for last operation to complete. * This code has to spin since there is no other notification from the PCI @@ -145,7 +163,7 @@ static ssize_t pci_vpd_read(struct pci_dev *dev, loff_t pos, size_t count, loff_t end = pos + count; u8 *buf = arg; - if (!vpd->cap) + if (!pci_vpd_available(dev)) return -ENODEV; if (pos < 0) @@ -206,7 +224,7 @@ static ssize_t pci_vpd_write(struct pci_dev *dev, loff_t pos, size_t count, loff_t end = pos + count; int ret = 0; - if (!vpd->cap) + if (!pci_vpd_available(dev)) return -ENODEV; if (pos < 0 || (pos & 3) || (count & 3)) @@ -242,14 +260,11 @@ static ssize_t pci_vpd_write(struct pci_dev *dev, loff_t pos, size_t count, void pci_vpd_init(struct pci_dev *dev) { + if (dev->vpd.len == PCI_VPD_SZ_INVALID) + return; + dev->vpd.cap = pci_find_capability(dev, PCI_CAP_ID_VPD); mutex_init(&dev->vpd.lock); - - if (!dev->vpd.len) - dev->vpd.len = pci_vpd_size(dev); - - if (dev->vpd.len == PCI_VPD_SZ_INVALID) - dev->vpd.cap = 0; } static ssize_t vpd_read(struct file *filp, struct kobject *kobj, @@ -294,13 +309,14 @@ const struct attribute_group pci_dev_vpd_attr_group = { void *pci_vpd_alloc(struct pci_dev *dev, unsigned int *size) { - unsigned int len = dev->vpd.len; + unsigned int len; void *buf; int cnt; - if (!dev->vpd.cap) + if (!pci_vpd_available(dev)) return ERR_PTR(-ENODEV); + len = dev->vpd.len; buf = kmalloc(len, GFP_KERNEL); if (!buf) return ERR_PTR(-ENOMEM); diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index c76adedd58c9..aa29841bbb79 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -272,7 +272,7 @@ config PWM_IQS620A config PWM_JZ4740 tristate "Ingenic JZ47xx PWM support" - depends on MIPS + depends on MIPS || COMPILE_TEST depends on COMMON_CLK select MFD_SYSCON help @@ -284,7 +284,8 @@ config PWM_JZ4740 config PWM_KEEMBAY tristate "Intel Keem Bay PWM driver" - depends on ARCH_KEEMBAY || (ARM64 && COMPILE_TEST) + depends on ARCH_KEEMBAY || COMPILE_TEST + depends on COMMON_CLK && HAS_IOMEM help The platform driver for Intel Keem Bay PWM controller. diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 35e894f4a379..4527f09a5c50 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -304,7 +304,7 @@ EXPORT_SYMBOL_GPL(pwmchip_add); * * Returns: 0 on success or a negative error code on failure. */ -int pwmchip_remove(struct pwm_chip *chip) +void pwmchip_remove(struct pwm_chip *chip) { pwmchip_sysfs_unexport(chip); @@ -318,8 +318,6 @@ int pwmchip_remove(struct pwm_chip *chip) free_pwms(chip); mutex_unlock(&pwm_lock); - - return 0; } EXPORT_SYMBOL_GPL(pwmchip_remove); diff --git a/drivers/pwm/pwm-ab8500.c b/drivers/pwm/pwm-ab8500.c index e2a26d9da25b..ad37bc46f272 100644 --- a/drivers/pwm/pwm-ab8500.c +++ b/drivers/pwm/pwm-ab8500.c @@ -22,14 +22,21 @@ struct ab8500_pwm_chip { struct pwm_chip chip; + unsigned int hwid; }; +static struct ab8500_pwm_chip *ab8500_pwm_from_chip(struct pwm_chip *chip) +{ + return container_of(chip, struct ab8500_pwm_chip, chip); +} + static int ab8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state) { int ret; u8 reg; unsigned int higher_val, lower_val; + struct ab8500_pwm_chip *ab8500 = ab8500_pwm_from_chip(chip); if (state->polarity != PWM_POLARITY_NORMAL) return -EINVAL; @@ -37,7 +44,7 @@ static int ab8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, if (!state->enabled) { ret = abx500_mask_and_set_register_interruptible(chip->dev, AB8500_MISC, AB8500_PWM_OUT_CTRL7_REG, - 1 << (chip->base - 1), 0); + 1 << ab8500->hwid, 0); if (ret < 0) dev_err(chip->dev, "%s: Failed to disable PWM, Error %d\n", @@ -56,7 +63,7 @@ static int ab8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, */ higher_val = ((state->duty_cycle & 0x0300) >> 8); - reg = AB8500_PWM_OUT_CTRL1_REG + ((chip->base - 1) * 2); + reg = AB8500_PWM_OUT_CTRL1_REG + (ab8500->hwid * 2); ret = abx500_set_register_interruptible(chip->dev, AB8500_MISC, reg, (u8)lower_val); @@ -70,7 +77,7 @@ static int ab8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, ret = abx500_mask_and_set_register_interruptible(chip->dev, AB8500_MISC, AB8500_PWM_OUT_CTRL7_REG, - 1 << (chip->base - 1), 1 << (chip->base - 1)); + 1 << ab8500->hwid, 1 << ab8500->hwid); if (ret < 0) dev_err(chip->dev, "%s: Failed to enable PWM, Error %d\n", pwm->label, ret); @@ -88,6 +95,9 @@ static int ab8500_pwm_probe(struct platform_device *pdev) struct ab8500_pwm_chip *ab8500; int err; + if (pdev->id < 1 || pdev->id > 31) + return dev_err_probe(&pdev->dev, EINVAL, "Invalid device id %d\n", pdev->id); + /* * Nothing to be done in probe, this is required to get the * device which is required for ab8500 read and write @@ -99,27 +109,13 @@ static int ab8500_pwm_probe(struct platform_device *pdev) ab8500->chip.dev = &pdev->dev; ab8500->chip.ops = &ab8500_pwm_ops; ab8500->chip.npwm = 1; + ab8500->hwid = pdev->id - 1; - err = pwmchip_add(&ab8500->chip); + err = devm_pwmchip_add(&pdev->dev, &ab8500->chip); if (err < 0) return dev_err_probe(&pdev->dev, err, "Failed to add pwm chip\n"); dev_dbg(&pdev->dev, "pwm probe successful\n"); - platform_set_drvdata(pdev, ab8500); - - return 0; -} - -static int ab8500_pwm_remove(struct platform_device *pdev) -{ - struct ab8500_pwm_chip *ab8500 = platform_get_drvdata(pdev); - int err; - - err = pwmchip_remove(&ab8500->chip); - if (err < 0) - return err; - - dev_dbg(&pdev->dev, "pwm driver removed\n"); return 0; } @@ -129,7 +125,6 @@ static struct platform_driver ab8500_pwm_driver = { .name = "ab8500-pwm", }, .probe = ab8500_pwm_probe, - .remove = ab8500_pwm_remove, }; module_platform_driver(ab8500_pwm_driver); diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c index 4459325d3650..a43b2babc809 100644 --- a/drivers/pwm/pwm-atmel-hlcdc.c +++ b/drivers/pwm/pwm-atmel-hlcdc.c @@ -281,11 +281,8 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev) static int atmel_hlcdc_pwm_remove(struct platform_device *pdev) { struct atmel_hlcdc_pwm *chip = platform_get_drvdata(pdev); - int ret; - ret = pwmchip_remove(&chip->chip); - if (ret) - return ret; + pwmchip_remove(&chip->chip); clk_disable_unprepare(chip->hlcdc->periph_clk); diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c index bf398f21484d..36f7ea381838 100644 --- a/drivers/pwm/pwm-atmel-tcb.c +++ b/drivers/pwm/pwm-atmel-tcb.c @@ -503,11 +503,8 @@ err_slow_clk: static int atmel_tcb_pwm_remove(struct platform_device *pdev) { struct atmel_tcb_pwm_chip *tcbpwm = platform_get_drvdata(pdev); - int err; - err = pwmchip_remove(&tcbpwm->chip); - if (err < 0) - return err; + pwmchip_remove(&tcbpwm->chip); clk_disable_unprepare(tcbpwm->slow_clk); clk_put(tcbpwm->slow_clk); diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c index a8162bae3e8a..e748604403cc 100644 --- a/drivers/pwm/pwm-atmel.c +++ b/drivers/pwm/pwm-atmel.c @@ -84,9 +84,19 @@ struct atmel_pwm_chip { void __iomem *base; const struct atmel_pwm_data *data; - unsigned int updated_pwms; - /* ISR is cleared when read, ensure only one thread does that */ - struct mutex isr_lock; + /* + * The hardware supports a mechanism to update a channel's duty cycle at + * the end of the currently running period. When such an update is + * pending we delay disabling the PWM until the new configuration is + * active because otherwise pmw_config(duty_cycle=0); pwm_disable(); + * might not result in an inactive output. + * This bitmask tracks for which channels an update is pending in + * hardware. + */ + u32 update_pending; + + /* Protects .update_pending */ + spinlock_t lock; }; static inline struct atmel_pwm_chip *to_atmel_pwm_chip(struct pwm_chip *chip) @@ -123,6 +133,64 @@ static inline void atmel_pwm_ch_writel(struct atmel_pwm_chip *chip, atmel_pwm_writel(chip, base + offset, val); } +static void atmel_pwm_update_pending(struct atmel_pwm_chip *chip) +{ + /* + * Each channel that has its bit in ISR set started a new period since + * ISR was cleared and so there is no more update pending. Note that + * reading ISR clears it, so this needs to handle all channels to not + * loose information. + */ + u32 isr = atmel_pwm_readl(chip, PWM_ISR); + + chip->update_pending &= ~isr; +} + +static void atmel_pwm_set_pending(struct atmel_pwm_chip *chip, unsigned int ch) +{ + spin_lock(&chip->lock); + + /* + * Clear pending flags in hardware because otherwise there might still + * be a stale flag in ISR. + */ + atmel_pwm_update_pending(chip); + + chip->update_pending |= (1 << ch); + + spin_unlock(&chip->lock); +} + +static int atmel_pwm_test_pending(struct atmel_pwm_chip *chip, unsigned int ch) +{ + int ret = 0; + + spin_lock(&chip->lock); + + if (chip->update_pending & (1 << ch)) { + atmel_pwm_update_pending(chip); + + if (chip->update_pending & (1 << ch)) + ret = 1; + } + + spin_unlock(&chip->lock); + + return ret; +} + +static int atmel_pwm_wait_nonpending(struct atmel_pwm_chip *chip, unsigned int ch) +{ + unsigned long timeout = jiffies + 2 * HZ; + int ret; + + while ((ret = atmel_pwm_test_pending(chip, ch)) && + time_before(jiffies, timeout)) + usleep_range(10, 100); + + return ret ? -ETIMEDOUT : 0; +} + static int atmel_pwm_calculate_cprd_and_pres(struct pwm_chip *chip, unsigned long clkrate, const struct pwm_state *state, @@ -185,6 +253,7 @@ static void atmel_pwm_update_cdty(struct pwm_chip *chip, struct pwm_device *pwm, atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm, atmel_pwm->data->regs.duty_upd, cdty); + atmel_pwm_set_pending(atmel_pwm, pwm->hwpwm); } static void atmel_pwm_set_cprd_cdty(struct pwm_chip *chip, @@ -205,20 +274,8 @@ static void atmel_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm, struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip); unsigned long timeout = jiffies + 2 * HZ; - /* - * Wait for at least a complete period to have passed before disabling a - * channel to be sure that CDTY has been updated - */ - mutex_lock(&atmel_pwm->isr_lock); - atmel_pwm->updated_pwms |= atmel_pwm_readl(atmel_pwm, PWM_ISR); - - while (!(atmel_pwm->updated_pwms & (1 << pwm->hwpwm)) && - time_before(jiffies, timeout)) { - usleep_range(10, 100); - atmel_pwm->updated_pwms |= atmel_pwm_readl(atmel_pwm, PWM_ISR); - } + atmel_pwm_wait_nonpending(atmel_pwm, pwm->hwpwm); - mutex_unlock(&atmel_pwm->isr_lock); atmel_pwm_writel(atmel_pwm, PWM_DIS, 1 << pwm->hwpwm); /* @@ -292,10 +349,6 @@ static int atmel_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, val |= PWM_CMR_CPOL; atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm, PWM_CMR, val); atmel_pwm_set_cprd_cdty(chip, pwm, cprd, cdty); - mutex_lock(&atmel_pwm->isr_lock); - atmel_pwm->updated_pwms |= atmel_pwm_readl(atmel_pwm, PWM_ISR); - atmel_pwm->updated_pwms &= ~(1 << pwm->hwpwm); - mutex_unlock(&atmel_pwm->isr_lock); atmel_pwm_writel(atmel_pwm, PWM_ENA, 1 << pwm->hwpwm); } else if (cstate.enabled) { atmel_pwm_disable(chip, pwm, true); @@ -326,6 +379,9 @@ static void atmel_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, tmp <<= pres; state->period = DIV64_U64_ROUND_UP(tmp, rate); + /* Wait for an updated duty_cycle queued in hardware */ + atmel_pwm_wait_nonpending(atmel_pwm, pwm->hwpwm); + cdty = atmel_pwm_ch_readl(atmel_pwm, pwm->hwpwm, atmel_pwm->data->regs.duty); tmp = (u64)(cprd - cdty) * NSEC_PER_SEC; @@ -416,9 +472,10 @@ static int atmel_pwm_probe(struct platform_device *pdev) if (!atmel_pwm) return -ENOMEM; - mutex_init(&atmel_pwm->isr_lock); atmel_pwm->data = of_device_get_match_data(&pdev->dev); - atmel_pwm->updated_pwms = 0; + + atmel_pwm->update_pending = 0; + spin_lock_init(&atmel_pwm->lock); atmel_pwm->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(atmel_pwm->base)) @@ -460,7 +517,6 @@ static int atmel_pwm_remove(struct platform_device *pdev) pwmchip_remove(&atmel_pwm->chip); clk_unprepare(atmel_pwm->clk); - mutex_destroy(&atmel_pwm->isr_lock); return 0; } diff --git a/drivers/pwm/pwm-bcm-kona.c b/drivers/pwm/pwm-bcm-kona.c index 8c85c66ea5c9..64148f5f81d0 100644 --- a/drivers/pwm/pwm-bcm-kona.c +++ b/drivers/pwm/pwm-bcm-kona.c @@ -267,8 +267,6 @@ static int kona_pwmc_probe(struct platform_device *pdev) if (kp == NULL) return -ENOMEM; - platform_set_drvdata(pdev, kp); - kp->chip.dev = &pdev->dev; kp->chip.ops = &kona_pwm_ops; kp->chip.npwm = 6; @@ -298,20 +296,13 @@ static int kona_pwmc_probe(struct platform_device *pdev) clk_disable_unprepare(kp->clk); - ret = pwmchip_add(&kp->chip); + ret = devm_pwmchip_add(&pdev->dev, &kp->chip); if (ret < 0) dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret); return ret; } -static int kona_pwmc_remove(struct platform_device *pdev) -{ - struct kona_pwmc *kp = platform_get_drvdata(pdev); - - return pwmchip_remove(&kp->chip); -} - static const struct of_device_id bcm_kona_pwmc_dt[] = { { .compatible = "brcm,kona-pwm" }, { }, @@ -324,7 +315,6 @@ static struct platform_driver kona_pwmc_driver = { .of_match_table = bcm_kona_pwmc_dt, }, .probe = kona_pwmc_probe, - .remove = kona_pwmc_remove, }; module_platform_driver(kona_pwmc_driver); diff --git a/drivers/pwm/pwm-brcmstb.c b/drivers/pwm/pwm-brcmstb.c index 8b1d1e7aa856..3b529f82b97c 100644 --- a/drivers/pwm/pwm-brcmstb.c +++ b/drivers/pwm/pwm-brcmstb.c @@ -282,12 +282,11 @@ out_clk: static int brcmstb_pwm_remove(struct platform_device *pdev) { struct brcmstb_pwm *p = platform_get_drvdata(pdev); - int ret; - ret = pwmchip_remove(&p->chip); + pwmchip_remove(&p->chip); clk_disable_unprepare(p->clk); - return ret; + return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c index 9fffb566af5f..5e29d9c682c3 100644 --- a/drivers/pwm/pwm-cros-ec.c +++ b/drivers/pwm/pwm-cros-ec.c @@ -280,7 +280,9 @@ static int cros_ec_pwm_remove(struct platform_device *dev) struct cros_ec_pwm_device *ec_pwm = platform_get_drvdata(dev); struct pwm_chip *chip = &ec_pwm->chip; - return pwmchip_remove(chip); + pwmchip_remove(chip); + + return 0; } #ifdef CONFIG_OF diff --git a/drivers/pwm/pwm-ep93xx.c b/drivers/pwm/pwm-ep93xx.c index fc3cb7d669c6..c45a75e65c86 100644 --- a/drivers/pwm/pwm-ep93xx.c +++ b/drivers/pwm/pwm-ep93xx.c @@ -183,27 +183,18 @@ static int ep93xx_pwm_probe(struct platform_device *pdev) ep93xx_pwm->chip.ops = &ep93xx_pwm_ops; ep93xx_pwm->chip.npwm = 1; - ret = pwmchip_add(&ep93xx_pwm->chip); + ret = devm_pwmchip_add(&pdev->dev, &ep93xx_pwm->chip); if (ret < 0) return ret; - platform_set_drvdata(pdev, ep93xx_pwm); return 0; } -static int ep93xx_pwm_remove(struct platform_device *pdev) -{ - struct ep93xx_pwm *ep93xx_pwm = platform_get_drvdata(pdev); - - return pwmchip_remove(&ep93xx_pwm->chip); -} - static struct platform_driver ep93xx_pwm_driver = { .driver = { .name = "ep93xx-pwm", }, .probe = ep93xx_pwm_probe, - .remove = ep93xx_pwm_remove, }; module_platform_driver(ep93xx_pwm_driver); diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c index 96ccd772280c..0247757f9a72 100644 --- a/drivers/pwm/pwm-fsl-ftm.c +++ b/drivers/pwm/pwm-fsl-ftm.c @@ -453,7 +453,7 @@ static int fsl_pwm_probe(struct platform_device *pdev) fpc->chip.ops = &fsl_pwm_ops; fpc->chip.npwm = 8; - ret = pwmchip_add(&fpc->chip); + ret = devm_pwmchip_add(&pdev->dev, &fpc->chip); if (ret < 0) { dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret); return ret; @@ -464,13 +464,6 @@ static int fsl_pwm_probe(struct platform_device *pdev) return fsl_pwm_init(fpc); } -static int fsl_pwm_remove(struct platform_device *pdev) -{ - struct fsl_pwm_chip *fpc = platform_get_drvdata(pdev); - - return pwmchip_remove(&fpc->chip); -} - #ifdef CONFIG_PM_SLEEP static int fsl_pwm_suspend(struct device *dev) { @@ -552,7 +545,6 @@ static struct platform_driver fsl_pwm_driver = { .pm = &fsl_pwm_pm_ops, }, .probe = fsl_pwm_probe, - .remove = fsl_pwm_remove, }; module_platform_driver(fsl_pwm_driver); diff --git a/drivers/pwm/pwm-hibvt.c b/drivers/pwm/pwm-hibvt.c index 4a6e9ad3c0ff..333f1b18ff4e 100644 --- a/drivers/pwm/pwm-hibvt.c +++ b/drivers/pwm/pwm-hibvt.c @@ -248,13 +248,15 @@ static int hibvt_pwm_remove(struct platform_device *pdev) pwm_chip = platform_get_drvdata(pdev); + pwmchip_remove(&pwm_chip->chip); + reset_control_assert(pwm_chip->rstc); msleep(30); reset_control_deassert(pwm_chip->rstc); clk_disable_unprepare(pwm_chip->clk); - return pwmchip_remove(&pwm_chip->chip); + return 0; } static const struct of_device_id hibvt_pwm_of_match[] = { diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c index 11b16ecc4f96..f97f82548293 100644 --- a/drivers/pwm/pwm-img.c +++ b/drivers/pwm/pwm-img.c @@ -326,28 +326,14 @@ err_pm_disable: static int img_pwm_remove(struct platform_device *pdev) { struct img_pwm_chip *pwm_chip = platform_get_drvdata(pdev); - u32 val; - unsigned int i; - int ret; - - ret = pm_runtime_get_sync(&pdev->dev); - if (ret < 0) { - pm_runtime_put(&pdev->dev); - return ret; - } - - for (i = 0; i < pwm_chip->chip.npwm; i++) { - val = img_pwm_readl(pwm_chip, PWM_CTRL_CFG); - val &= ~BIT(i); - img_pwm_writel(pwm_chip, PWM_CTRL_CFG, val); - } - pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); if (!pm_runtime_status_suspended(&pdev->dev)) img_pwm_runtime_suspend(&pdev->dev); - return pwmchip_remove(&pwm_chip->chip); + pwmchip_remove(&pwm_chip->chip); + + return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/pwm/pwm-imx-tpm.c b/drivers/pwm/pwm-imx-tpm.c index dbb50493abdd..e5e7b7c339a8 100644 --- a/drivers/pwm/pwm-imx-tpm.c +++ b/drivers/pwm/pwm-imx-tpm.c @@ -382,11 +382,12 @@ static int pwm_imx_tpm_probe(struct platform_device *pdev) static int pwm_imx_tpm_remove(struct platform_device *pdev) { struct imx_tpm_pwm_chip *tpm = platform_get_drvdata(pdev); - int ret = pwmchip_remove(&tpm->chip); + + pwmchip_remove(&tpm->chip); clk_disable_unprepare(tpm->clk); - return ret; + return 0; } static int __maybe_unused pwm_imx_tpm_suspend(struct device *dev) diff --git a/drivers/pwm/pwm-imx27.c b/drivers/pwm/pwm-imx27.c index f6588a96fbd9..ea91a2f81a9f 100644 --- a/drivers/pwm/pwm-imx27.c +++ b/drivers/pwm/pwm-imx27.c @@ -313,8 +313,6 @@ static int pwm_imx27_probe(struct platform_device *pdev) if (imx == NULL) return -ENOMEM; - platform_set_drvdata(pdev, imx); - imx->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); if (IS_ERR(imx->clk_ipg)) return dev_err_probe(&pdev->dev, PTR_ERR(imx->clk_ipg), @@ -342,16 +340,7 @@ static int pwm_imx27_probe(struct platform_device *pdev) if (!(pwmcr & MX3_PWMCR_EN)) pwm_imx27_clk_disable_unprepare(imx); - return pwmchip_add(&imx->chip); -} - -static int pwm_imx27_remove(struct platform_device *pdev) -{ - struct pwm_imx27_chip *imx; - - imx = platform_get_drvdata(pdev); - - return pwmchip_remove(&imx->chip); + return devm_pwmchip_add(&pdev->dev, &imx->chip); } static struct platform_driver imx_pwm_driver = { @@ -360,7 +349,6 @@ static struct platform_driver imx_pwm_driver = { .of_match_table = pwm_imx27_dt_ids, }, .probe = pwm_imx27_probe, - .remove = pwm_imx27_remove, }; module_platform_driver(imx_pwm_driver); diff --git a/drivers/pwm/pwm-intel-lgm.c b/drivers/pwm/pwm-intel-lgm.c index 015f5eba09a1..b66c35074087 100644 --- a/drivers/pwm/pwm-intel-lgm.c +++ b/drivers/pwm/pwm-intel-lgm.c @@ -176,8 +176,6 @@ static int lgm_pwm_probe(struct platform_device *pdev) if (!pc) return -ENOMEM; - platform_set_drvdata(pdev, pc); - io_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(io_base)) return PTR_ERR(io_base); @@ -210,20 +208,13 @@ static int lgm_pwm_probe(struct platform_device *pdev) lgm_pwm_init(pc); - ret = pwmchip_add(&pc->chip); + ret = devm_pwmchip_add(dev, &pc->chip); if (ret < 0) return dev_err_probe(dev, ret, "failed to add PWM chip\n"); return 0; } -static int lgm_pwm_remove(struct platform_device *pdev) -{ - struct lgm_pwm_chip *pc = platform_get_drvdata(pdev); - - return pwmchip_remove(&pc->chip); -} - static const struct of_device_id lgm_pwm_of_match[] = { { .compatible = "intel,lgm-pwm" }, { } @@ -236,7 +227,6 @@ static struct platform_driver lgm_pwm_driver = { .of_match_table = lgm_pwm_of_match, }, .probe = lgm_pwm_probe, - .remove = lgm_pwm_remove, }; module_platform_driver(lgm_pwm_driver); diff --git a/drivers/pwm/pwm-iqs620a.c b/drivers/pwm/pwm-iqs620a.c index 6c6e26d18329..54bd95a5cab0 100644 --- a/drivers/pwm/pwm-iqs620a.c +++ b/drivers/pwm/pwm-iqs620a.c @@ -189,7 +189,6 @@ static int iqs620_pwm_probe(struct platform_device *pdev) if (!iqs620_pwm) return -ENOMEM; - platform_set_drvdata(pdev, iqs620_pwm); iqs620_pwm->iqs62x = iqs62x; ret = regmap_read(iqs62x->regmap, IQS620_PWR_SETTINGS, &val); @@ -224,31 +223,18 @@ static int iqs620_pwm_probe(struct platform_device *pdev) if (ret) return ret; - ret = pwmchip_add(&iqs620_pwm->chip); + ret = devm_pwmchip_add(&pdev->dev, &iqs620_pwm->chip); if (ret) dev_err(&pdev->dev, "Failed to add device: %d\n", ret); return ret; } -static int iqs620_pwm_remove(struct platform_device *pdev) -{ - struct iqs620_pwm_private *iqs620_pwm = platform_get_drvdata(pdev); - int ret; - - ret = pwmchip_remove(&iqs620_pwm->chip); - if (ret) - dev_err(&pdev->dev, "Failed to remove device: %d\n", ret); - - return ret; -} - static struct platform_driver iqs620_pwm_platform_driver = { .driver = { .name = "iqs620a-pwm", }, .probe = iqs620_pwm_probe, - .remove = iqs620_pwm_remove, }; module_platform_driver(iqs620_pwm_platform_driver); diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c index 990e7904c7f1..23dc1fb770e2 100644 --- a/drivers/pwm/pwm-jz4740.c +++ b/drivers/pwm/pwm-jz4740.c @@ -245,16 +245,7 @@ static int jz4740_pwm_probe(struct platform_device *pdev) jz4740->chip.ops = &jz4740_pwm_ops; jz4740->chip.npwm = info->num_pwms; - platform_set_drvdata(pdev, jz4740); - - return pwmchip_add(&jz4740->chip); -} - -static int jz4740_pwm_remove(struct platform_device *pdev) -{ - struct jz4740_pwm_chip *jz4740 = platform_get_drvdata(pdev); - - return pwmchip_remove(&jz4740->chip); + return devm_pwmchip_add(dev, &jz4740->chip); } static const struct soc_info __maybe_unused jz4740_soc_info = { @@ -280,7 +271,6 @@ static struct platform_driver jz4740_pwm_driver = { .of_match_table = of_match_ptr(jz4740_pwm_dt_ids), }, .probe = jz4740_pwm_probe, - .remove = jz4740_pwm_remove, }; module_platform_driver(jz4740_pwm_driver); diff --git a/drivers/pwm/pwm-keembay.c b/drivers/pwm/pwm-keembay.c index 521a825c8ba0..733811b05721 100644 --- a/drivers/pwm/pwm-keembay.c +++ b/drivers/pwm/pwm-keembay.c @@ -207,22 +207,13 @@ static int keembay_pwm_probe(struct platform_device *pdev) priv->chip.ops = &keembay_pwm_ops; priv->chip.npwm = KMB_TOTAL_PWM_CHANNELS; - ret = pwmchip_add(&priv->chip); + ret = devm_pwmchip_add(dev, &priv->chip); if (ret) return dev_err_probe(dev, ret, "Failed to add PWM chip\n"); - platform_set_drvdata(pdev, priv); - return 0; } -static int keembay_pwm_remove(struct platform_device *pdev) -{ - struct keembay_pwm *priv = platform_get_drvdata(pdev); - - return pwmchip_remove(&priv->chip); -} - static const struct of_device_id keembay_pwm_of_match[] = { { .compatible = "intel,keembay-pwm" }, { } @@ -231,7 +222,6 @@ MODULE_DEVICE_TABLE(of, keembay_pwm_of_match); static struct platform_driver keembay_pwm_driver = { .probe = keembay_pwm_probe, - .remove = keembay_pwm_remove, .driver = { .name = "pwm-keembay", .of_match_table = keembay_pwm_of_match, diff --git a/drivers/pwm/pwm-lp3943.c b/drivers/pwm/pwm-lp3943.c index 7551253ada32..ea17d446a627 100644 --- a/drivers/pwm/pwm-lp3943.c +++ b/drivers/pwm/pwm-lp3943.c @@ -276,16 +276,7 @@ static int lp3943_pwm_probe(struct platform_device *pdev) lp3943_pwm->chip.ops = &lp3943_pwm_ops; lp3943_pwm->chip.npwm = LP3943_NUM_PWMS; - platform_set_drvdata(pdev, lp3943_pwm); - - return pwmchip_add(&lp3943_pwm->chip); -} - -static int lp3943_pwm_remove(struct platform_device *pdev) -{ - struct lp3943_pwm *lp3943_pwm = platform_get_drvdata(pdev); - - return pwmchip_remove(&lp3943_pwm->chip); + return devm_pwmchip_add(&pdev->dev, &lp3943_pwm->chip); } #ifdef CONFIG_OF @@ -298,7 +289,6 @@ MODULE_DEVICE_TABLE(of, lp3943_pwm_of_match); static struct platform_driver lp3943_pwm_driver = { .probe = lp3943_pwm_probe, - .remove = lp3943_pwm_remove, .driver = { .name = "lp3943-pwm", .of_match_table = of_match_ptr(lp3943_pwm_of_match), diff --git a/drivers/pwm/pwm-lpc32xx.c b/drivers/pwm/pwm-lpc32xx.c index 2834a0f001d3..ddeab5687cb8 100644 --- a/drivers/pwm/pwm-lpc32xx.c +++ b/drivers/pwm/pwm-lpc32xx.c @@ -117,29 +117,20 @@ static int lpc32xx_pwm_probe(struct platform_device *pdev) lpc32xx->chip.ops = &lpc32xx_pwm_ops; lpc32xx->chip.npwm = 1; - ret = pwmchip_add(&lpc32xx->chip); - if (ret < 0) { - dev_err(&pdev->dev, "failed to add PWM chip, error %d\n", ret); - return ret; - } - - /* When PWM is disable, configure the output to the default value */ + /* If PWM is disabled, configure the output to the default value */ val = readl(lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2)); val &= ~PWM_PIN_LEVEL; writel(val, lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2)); - platform_set_drvdata(pdev, lpc32xx); + ret = devm_pwmchip_add(&pdev->dev, &lpc32xx->chip); + if (ret < 0) { + dev_err(&pdev->dev, "failed to add PWM chip, error %d\n", ret); + return ret; + } return 0; } -static int lpc32xx_pwm_remove(struct platform_device *pdev) -{ - struct lpc32xx_pwm_chip *lpc32xx = platform_get_drvdata(pdev); - - return pwmchip_remove(&lpc32xx->chip); -} - static const struct of_device_id lpc32xx_pwm_dt_ids[] = { { .compatible = "nxp,lpc3220-pwm", }, { /* sentinel */ } @@ -152,7 +143,6 @@ static struct platform_driver lpc32xx_pwm_driver = { .of_match_table = lpc32xx_pwm_dt_ids, }, .probe = lpc32xx_pwm_probe, - .remove = lpc32xx_pwm_remove, }; module_platform_driver(lpc32xx_pwm_driver); diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index b4a31060bcd7..0d4dd80e9f07 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -253,13 +253,11 @@ static int pwm_mediatek_probe(struct platform_device *pdev) } } - platform_set_drvdata(pdev, pc); - pc->chip.dev = &pdev->dev; pc->chip.ops = &pwm_mediatek_ops; pc->chip.npwm = pc->soc->num_pwms; - ret = pwmchip_add(&pc->chip); + ret = devm_pwmchip_add(&pdev->dev, &pc->chip); if (ret < 0) { dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret); return ret; @@ -268,13 +266,6 @@ static int pwm_mediatek_probe(struct platform_device *pdev) return 0; } -static int pwm_mediatek_remove(struct platform_device *pdev) -{ - struct pwm_mediatek_chip *pc = platform_get_drvdata(pdev); - - return pwmchip_remove(&pc->chip); -} - static const struct pwm_mediatek_of_data mt2712_pwm_data = { .num_pwms = 8, .pwm45_fixup = false, @@ -335,7 +326,6 @@ static struct platform_driver pwm_mediatek_driver = { .of_match_table = pwm_mediatek_of_match, }, .probe = pwm_mediatek_probe, - .remove = pwm_mediatek_remove, }; module_platform_driver(pwm_mediatek_driver); diff --git a/drivers/pwm/pwm-mtk-disp.c b/drivers/pwm/pwm-mtk-disp.c index 9b3ba401a3db..c605013e4114 100644 --- a/drivers/pwm/pwm-mtk-disp.c +++ b/drivers/pwm/pwm-mtk-disp.c @@ -5,6 +5,7 @@ * Author: YH Huang <yh.huang@mediatek.com> */ +#include <linux/bitfield.h> #include <linux/clk.h> #include <linux/err.h> #include <linux/io.h> @@ -47,6 +48,7 @@ struct mtk_disp_pwm { struct clk *clk_main; struct clk *clk_mm; void __iomem *base; + bool enabled; }; static inline struct mtk_disp_pwm *to_mtk_disp_pwm(struct pwm_chip *chip) @@ -66,14 +68,47 @@ static void mtk_disp_pwm_update_bits(struct mtk_disp_pwm *mdp, u32 offset, writel(value, address); } -static int mtk_disp_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, - int duty_ns, int period_ns) +static int mtk_disp_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, + const struct pwm_state *state) { struct mtk_disp_pwm *mdp = to_mtk_disp_pwm(chip); u32 clk_div, period, high_width, value; u64 div, rate; int err; + if (state->polarity != PWM_POLARITY_NORMAL) + return -EINVAL; + + if (!state->enabled) { + mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, mdp->data->enable_mask, + 0x0); + + if (mdp->enabled) { + clk_disable_unprepare(mdp->clk_mm); + clk_disable_unprepare(mdp->clk_main); + } + + mdp->enabled = false; + return 0; + } + + if (!mdp->enabled) { + err = clk_prepare_enable(mdp->clk_main); + if (err < 0) { + dev_err(chip->dev, "Can't enable mdp->clk_main: %pe\n", + ERR_PTR(err)); + return err; + } + + err = clk_prepare_enable(mdp->clk_mm); + if (err < 0) { + dev_err(chip->dev, "Can't enable mdp->clk_mm: %pe\n", + ERR_PTR(err)); + clk_disable_unprepare(mdp->clk_main); + return err; + } + } + /* * Find period, high_width and clk_div to suit duty_ns and period_ns. * Calculate proper div value to keep period value in the bound. @@ -85,29 +120,24 @@ static int mtk_disp_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, * high_width = (PWM_CLK_RATE * duty_ns) / (10^9 * (clk_div + 1)) */ rate = clk_get_rate(mdp->clk_main); - clk_div = div_u64(rate * period_ns, NSEC_PER_SEC) >> + clk_div = mul_u64_u64_div_u64(state->period, rate, NSEC_PER_SEC) >> PWM_PERIOD_BIT_WIDTH; - if (clk_div > PWM_CLKDIV_MAX) + if (clk_div > PWM_CLKDIV_MAX) { + if (!mdp->enabled) { + clk_disable_unprepare(mdp->clk_mm); + clk_disable_unprepare(mdp->clk_main); + } return -EINVAL; + } div = NSEC_PER_SEC * (clk_div + 1); - period = div64_u64(rate * period_ns, div); + period = mul_u64_u64_div_u64(state->period, rate, div); if (period > 0) period--; - high_width = div64_u64(rate * duty_ns, div); + high_width = mul_u64_u64_div_u64(state->duty_cycle, rate, div); value = period | (high_width << PWM_HIGH_WIDTH_SHIFT); - err = clk_enable(mdp->clk_main); - if (err < 0) - return err; - - err = clk_enable(mdp->clk_mm); - if (err < 0) { - clk_disable(mdp->clk_main); - return err; - } - mtk_disp_pwm_update_bits(mdp, mdp->data->con0, PWM_CLKDIV_MASK, clk_div << PWM_CLKDIV_SHIFT); @@ -122,50 +152,70 @@ static int mtk_disp_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, mtk_disp_pwm_update_bits(mdp, mdp->data->commit, mdp->data->commit_mask, 0x0); + } else { + /* + * For MT2701, disable double buffer before writing register + * and select manual mode and use PWM_PERIOD/PWM_HIGH_WIDTH. + */ + mtk_disp_pwm_update_bits(mdp, mdp->data->bls_debug, + mdp->data->bls_debug_mask, + mdp->data->bls_debug_mask); + mtk_disp_pwm_update_bits(mdp, mdp->data->con0, + mdp->data->con0_sel, + mdp->data->con0_sel); } - clk_disable(mdp->clk_mm); - clk_disable(mdp->clk_main); + mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, mdp->data->enable_mask, + mdp->data->enable_mask); + mdp->enabled = true; return 0; } -static int mtk_disp_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) +static void mtk_disp_pwm_get_state(struct pwm_chip *chip, + struct pwm_device *pwm, + struct pwm_state *state) { struct mtk_disp_pwm *mdp = to_mtk_disp_pwm(chip); + u64 rate, period, high_width; + u32 clk_div, con0, con1; int err; - err = clk_enable(mdp->clk_main); - if (err < 0) - return err; - - err = clk_enable(mdp->clk_mm); + err = clk_prepare_enable(mdp->clk_main); if (err < 0) { - clk_disable(mdp->clk_main); - return err; + dev_err(chip->dev, "Can't enable mdp->clk_main: %pe\n", ERR_PTR(err)); + return; } - mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, mdp->data->enable_mask, - mdp->data->enable_mask); - - return 0; -} - -static void mtk_disp_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) -{ - struct mtk_disp_pwm *mdp = to_mtk_disp_pwm(chip); - - mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, mdp->data->enable_mask, - 0x0); + err = clk_prepare_enable(mdp->clk_mm); + if (err < 0) { + dev_err(chip->dev, "Can't enable mdp->clk_mm: %pe\n", ERR_PTR(err)); + clk_disable_unprepare(mdp->clk_main); + return; + } - clk_disable(mdp->clk_mm); - clk_disable(mdp->clk_main); + rate = clk_get_rate(mdp->clk_main); + con0 = readl(mdp->base + mdp->data->con0); + con1 = readl(mdp->base + mdp->data->con1); + state->enabled = !!(con0 & BIT(0)); + clk_div = FIELD_GET(PWM_CLKDIV_MASK, con0); + period = FIELD_GET(PWM_PERIOD_MASK, con1); + /* + * period has 12 bits, clk_div 11 and NSEC_PER_SEC has 30, + * so period * (clk_div + 1) * NSEC_PER_SEC doesn't overflow. + */ + state->period = DIV64_U64_ROUND_UP(period * (clk_div + 1) * NSEC_PER_SEC, rate); + high_width = FIELD_GET(PWM_HIGH_WIDTH_MASK, con1); + state->duty_cycle = DIV64_U64_ROUND_UP(high_width * (clk_div + 1) * NSEC_PER_SEC, + rate); + state->polarity = PWM_POLARITY_NORMAL; + clk_disable_unprepare(mdp->clk_mm); + clk_disable_unprepare(mdp->clk_main); } static const struct pwm_ops mtk_disp_pwm_ops = { - .config = mtk_disp_pwm_config, - .enable = mtk_disp_pwm_enable, - .disable = mtk_disp_pwm_disable, + .apply = mtk_disp_pwm_apply, + .get_state = mtk_disp_pwm_get_state, .owner = THIS_MODULE, }; @@ -192,58 +242,28 @@ static int mtk_disp_pwm_probe(struct platform_device *pdev) if (IS_ERR(mdp->clk_mm)) return PTR_ERR(mdp->clk_mm); - ret = clk_prepare(mdp->clk_main); - if (ret < 0) - return ret; - - ret = clk_prepare(mdp->clk_mm); - if (ret < 0) - goto disable_clk_main; - mdp->chip.dev = &pdev->dev; mdp->chip.ops = &mtk_disp_pwm_ops; mdp->chip.npwm = 1; ret = pwmchip_add(&mdp->chip); if (ret < 0) { - dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret); - goto disable_clk_mm; + dev_err(&pdev->dev, "pwmchip_add() failed: %pe\n", ERR_PTR(ret)); + return ret; } platform_set_drvdata(pdev, mdp); - /* - * For MT2701, disable double buffer before writing register - * and select manual mode and use PWM_PERIOD/PWM_HIGH_WIDTH. - */ - if (!mdp->data->has_commit) { - mtk_disp_pwm_update_bits(mdp, mdp->data->bls_debug, - mdp->data->bls_debug_mask, - mdp->data->bls_debug_mask); - mtk_disp_pwm_update_bits(mdp, mdp->data->con0, - mdp->data->con0_sel, - mdp->data->con0_sel); - } - return 0; - -disable_clk_mm: - clk_unprepare(mdp->clk_mm); -disable_clk_main: - clk_unprepare(mdp->clk_main); - return ret; } static int mtk_disp_pwm_remove(struct platform_device *pdev) { struct mtk_disp_pwm *mdp = platform_get_drvdata(pdev); - int ret; - ret = pwmchip_remove(&mdp->chip); - clk_unprepare(mdp->clk_mm); - clk_unprepare(mdp->clk_main); + pwmchip_remove(&mdp->chip); - return ret; + return 0; } static const struct mtk_pwm_data mt2701_pwm_data = { diff --git a/drivers/pwm/pwm-mxs.c b/drivers/pwm/pwm-mxs.c index a22180803bd7..766dbc58dad8 100644 --- a/drivers/pwm/pwm-mxs.c +++ b/drivers/pwm/pwm-mxs.c @@ -145,30 +145,18 @@ static int mxs_pwm_probe(struct platform_device *pdev) return ret; } - ret = pwmchip_add(&mxs->chip); + /* FIXME: Only do this if the PWM isn't already running */ + ret = stmp_reset_block(mxs->base); + if (ret) + return dev_err_probe(&pdev->dev, ret, "failed to reset PWM\n"); + + ret = devm_pwmchip_add(&pdev->dev, &mxs->chip); if (ret < 0) { dev_err(&pdev->dev, "failed to add pwm chip %d\n", ret); return ret; } - platform_set_drvdata(pdev, mxs); - - ret = stmp_reset_block(mxs->base); - if (ret) - goto pwm_remove; - return 0; - -pwm_remove: - pwmchip_remove(&mxs->chip); - return ret; -} - -static int mxs_pwm_remove(struct platform_device *pdev) -{ - struct mxs_pwm_chip *mxs = platform_get_drvdata(pdev); - - return pwmchip_remove(&mxs->chip); } static const struct of_device_id mxs_pwm_dt_ids[] = { @@ -183,7 +171,6 @@ static struct platform_driver mxs_pwm_driver = { .of_match_table = mxs_pwm_dt_ids, }, .probe = mxs_pwm_probe, - .remove = mxs_pwm_remove, }; module_platform_driver(mxs_pwm_driver); diff --git a/drivers/pwm/pwm-ntxec.c b/drivers/pwm/pwm-ntxec.c index 50c454c553c4..ab63b081df53 100644 --- a/drivers/pwm/pwm-ntxec.c +++ b/drivers/pwm/pwm-ntxec.c @@ -150,23 +150,12 @@ static int ntxec_pwm_probe(struct platform_device *pdev) priv->ec = ec; priv->dev = &pdev->dev; - platform_set_drvdata(pdev, priv); - chip = &priv->chip; chip->dev = &pdev->dev; chip->ops = &ntxec_pwm_ops; - chip->base = -1; chip->npwm = 1; - return pwmchip_add(chip); -} - -static int ntxec_pwm_remove(struct platform_device *pdev) -{ - struct ntxec_pwm *priv = platform_get_drvdata(pdev); - struct pwm_chip *chip = &priv->chip; - - return pwmchip_remove(chip); + return devm_pwmchip_add(&pdev->dev, chip); } static struct platform_driver ntxec_pwm_driver = { @@ -174,7 +163,6 @@ static struct platform_driver ntxec_pwm_driver = { .name = "ntxec-pwm", }, .probe = ntxec_pwm_probe, - .remove = ntxec_pwm_remove, }; module_platform_driver(ntxec_pwm_driver); diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c index 507a2d945b90..fa800fcf31d4 100644 --- a/drivers/pwm/pwm-omap-dmtimer.c +++ b/drivers/pwm/pwm-omap-dmtimer.c @@ -444,11 +444,8 @@ err_find_timer_pdev: static int pwm_omap_dmtimer_remove(struct platform_device *pdev) { struct pwm_omap_dmtimer_chip *omap = platform_get_drvdata(pdev); - int ret; - ret = pwmchip_remove(&omap->chip); - if (ret) - return ret; + pwmchip_remove(&omap->chip); if (pm_runtime_active(&omap->dm_timer_pdev->dev)) omap->pdata->stop(omap->dm_timer); diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index 42ed770b432c..c56001a790d0 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -601,11 +601,8 @@ static int pca9685_pwm_probe(struct i2c_client *client, static int pca9685_pwm_remove(struct i2c_client *client) { struct pca9685 *pca = i2c_get_clientdata(client); - int ret; - ret = pwmchip_remove(&pca->chip); - if (ret) - return ret; + pwmchip_remove(&pca->chip); if (!pm_runtime_enabled(&client->dev)) { /* Put chip in sleep state if runtime PM is disabled */ diff --git a/drivers/pwm/pwm-pxa.c b/drivers/pwm/pwm-pxa.c index e091a528e33c..a9efdcf839ae 100644 --- a/drivers/pwm/pwm-pxa.c +++ b/drivers/pwm/pwm-pxa.c @@ -195,32 +195,21 @@ static int pwm_probe(struct platform_device *pdev) if (IS_ERR(pc->mmio_base)) return PTR_ERR(pc->mmio_base); - ret = pwmchip_add(&pc->chip); + ret = devm_pwmchip_add(&pdev->dev, &pc->chip); if (ret < 0) { dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret); return ret; } - platform_set_drvdata(pdev, pc); return 0; } -static int pwm_remove(struct platform_device *pdev) -{ - struct pxa_pwm_chip *pc; - - pc = platform_get_drvdata(pdev); - - return pwmchip_remove(&pc->chip); -} - static struct platform_driver pwm_driver = { .driver = { .name = "pxa25x-pwm", .of_match_table = pwm_of_match, }, .probe = pwm_probe, - .remove = pwm_remove, .id_table = pwm_id_table, }; diff --git a/drivers/pwm/pwm-raspberrypi-poe.c b/drivers/pwm/pwm-raspberrypi-poe.c index 043fc32e8be8..579a15240e0a 100644 --- a/drivers/pwm/pwm-raspberrypi-poe.c +++ b/drivers/pwm/pwm-raspberrypi-poe.c @@ -166,8 +166,6 @@ static int raspberrypi_pwm_probe(struct platform_device *pdev) rpipwm->chip.base = -1; rpipwm->chip.npwm = RASPBERRYPI_FIRMWARE_PWM_NUM; - platform_set_drvdata(pdev, rpipwm); - ret = raspberrypi_pwm_get_property(rpipwm->firmware, RPI_PWM_CUR_DUTY_REG, &rpipwm->duty_cycle); if (ret) { @@ -175,14 +173,7 @@ static int raspberrypi_pwm_probe(struct platform_device *pdev) return ret; } - return pwmchip_add(&rpipwm->chip); -} - -static int raspberrypi_pwm_remove(struct platform_device *pdev) -{ - struct raspberrypi_pwm *rpipwm = platform_get_drvdata(pdev); - - return pwmchip_remove(&rpipwm->chip); + return devm_pwmchip_add(dev, &rpipwm->chip); } static const struct of_device_id raspberrypi_pwm_of_match[] = { @@ -197,7 +188,6 @@ static struct platform_driver raspberrypi_pwm_driver = { .of_match_table = raspberrypi_pwm_of_match, }, .probe = raspberrypi_pwm_probe, - .remove = raspberrypi_pwm_remove, }; module_platform_driver(raspberrypi_pwm_driver); diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c index 9daca0c772c7..b437192380e2 100644 --- a/drivers/pwm/pwm-rcar.c +++ b/drivers/pwm/pwm-rcar.c @@ -241,13 +241,12 @@ static int rcar_pwm_probe(struct platform_device *pdev) static int rcar_pwm_remove(struct platform_device *pdev) { struct rcar_pwm_chip *rcar_pwm = platform_get_drvdata(pdev); - int ret; - ret = pwmchip_remove(&rcar_pwm->chip); + pwmchip_remove(&rcar_pwm->chip); pm_runtime_disable(&pdev->dev); - return ret; + return 0; } static const struct of_device_id rcar_pwm_of_table[] = { diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c index b853e7942605..4381df90a527 100644 --- a/drivers/pwm/pwm-renesas-tpu.c +++ b/drivers/pwm/pwm-renesas-tpu.c @@ -425,13 +425,12 @@ static int tpu_probe(struct platform_device *pdev) static int tpu_remove(struct platform_device *pdev) { struct tpu_device *tpu = platform_get_drvdata(pdev); - int ret; - ret = pwmchip_remove(&tpu->chip); + pwmchip_remove(&tpu->chip); pm_runtime_disable(&pdev->dev); - return ret; + return 0; } #ifdef CONFIG_OF diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c index cbe900877724..f3647b317152 100644 --- a/drivers/pwm/pwm-rockchip.c +++ b/drivers/pwm/pwm-rockchip.c @@ -384,24 +384,12 @@ static int rockchip_pwm_remove(struct platform_device *pdev) { struct rockchip_pwm_chip *pc = platform_get_drvdata(pdev); - /* - * Disable the PWM clk before unpreparing it if the PWM device is still - * running. This should only happen when the last PWM user left it - * enabled, or when nobody requested a PWM that was previously enabled - * by the bootloader. - * - * FIXME: Maybe the core should disable all PWM devices in - * pwmchip_remove(). In this case we'd only have to call - * clk_unprepare() after pwmchip_remove(). - * - */ - if (pwm_is_enabled(pc->chip.pwms)) - clk_disable(pc->clk); + pwmchip_remove(&pc->chip); clk_unprepare(pc->pclk); clk_unprepare(pc->clk); - return pwmchip_remove(&pc->chip); + return 0; } static struct platform_driver rockchip_pwm_driver = { diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c index f6c528f02d43..dd94c4312a0c 100644 --- a/drivers/pwm/pwm-samsung.c +++ b/drivers/pwm/pwm-samsung.c @@ -580,11 +580,8 @@ static int pwm_samsung_probe(struct platform_device *pdev) static int pwm_samsung_remove(struct platform_device *pdev) { struct samsung_pwm_chip *chip = platform_get_drvdata(pdev); - int ret; - ret = pwmchip_remove(&chip->chip); - if (ret < 0) - return ret; + pwmchip_remove(&chip->chip); clk_disable_unprepare(chip->base_clk); diff --git a/drivers/pwm/pwm-sifive.c b/drivers/pwm/pwm-sifive.c index 420edc4aa94a..253c4a17d255 100644 --- a/drivers/pwm/pwm-sifive.c +++ b/drivers/pwm/pwm-sifive.c @@ -291,7 +291,7 @@ static int pwm_sifive_remove(struct platform_device *dev) struct pwm_sifive_ddata *ddata = platform_get_drvdata(dev); bool is_enabled = false; struct pwm_device *pwm; - int ret, ch; + int ch; for (ch = 0; ch < ddata->chip.npwm; ch++) { pwm = &ddata->chip.pwms[ch]; @@ -304,10 +304,10 @@ static int pwm_sifive_remove(struct platform_device *dev) clk_disable(ddata->clk); clk_disable_unprepare(ddata->clk); - ret = pwmchip_remove(&ddata->chip); + pwmchip_remove(&ddata->chip); clk_notifier_unregister(ddata->clk, &ddata->notifier); - return ret; + return 0; } static const struct of_device_id pwm_sifive_of_match[] = { diff --git a/drivers/pwm/pwm-sl28cpld.c b/drivers/pwm/pwm-sl28cpld.c index 7a69c1a0c060..589aeaaa6ac8 100644 --- a/drivers/pwm/pwm-sl28cpld.c +++ b/drivers/pwm/pwm-sl28cpld.c @@ -231,9 +231,7 @@ static int sl28cpld_pwm_probe(struct platform_device *pdev) chip->ops = &sl28cpld_pwm_ops; chip->npwm = 1; - platform_set_drvdata(pdev, priv); - - ret = pwmchip_add(&priv->pwm_chip); + ret = devm_pwmchip_add(&pdev->dev, &priv->pwm_chip); if (ret) { dev_err(&pdev->dev, "failed to add PWM chip (%pe)", ERR_PTR(ret)); @@ -243,13 +241,6 @@ static int sl28cpld_pwm_probe(struct platform_device *pdev) return 0; } -static int sl28cpld_pwm_remove(struct platform_device *pdev) -{ - struct sl28cpld_pwm *priv = platform_get_drvdata(pdev); - - return pwmchip_remove(&priv->pwm_chip); -} - static const struct of_device_id sl28cpld_pwm_of_match[] = { { .compatible = "kontron,sl28cpld-pwm" }, {} @@ -258,7 +249,6 @@ MODULE_DEVICE_TABLE(of, sl28cpld_pwm_of_match); static struct platform_driver sl28cpld_pwm_driver = { .probe = sl28cpld_pwm_probe, - .remove = sl28cpld_pwm_remove, .driver = { .name = "sl28cpld-pwm", .of_match_table = sl28cpld_pwm_of_match, diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c index 93dd03618465..3115abb3f52a 100644 --- a/drivers/pwm/pwm-stm32-lp.c +++ b/drivers/pwm/pwm-stm32-lp.c @@ -209,7 +209,7 @@ static int stm32_pwm_lp_probe(struct platform_device *pdev) priv->chip.ops = &stm32_pwm_lp_ops; priv->chip.npwm = 1; - ret = pwmchip_add(&priv->chip); + ret = devm_pwmchip_add(&pdev->dev, &priv->chip); if (ret < 0) return ret; @@ -218,15 +218,6 @@ static int stm32_pwm_lp_probe(struct platform_device *pdev) return 0; } -static int stm32_pwm_lp_remove(struct platform_device *pdev) -{ - struct stm32_pwm_lp *priv = platform_get_drvdata(pdev); - - pwm_disable(&priv->chip.pwms[0]); - - return pwmchip_remove(&priv->chip); -} - static int __maybe_unused stm32_pwm_lp_suspend(struct device *dev) { struct stm32_pwm_lp *priv = dev_get_drvdata(dev); @@ -258,7 +249,6 @@ MODULE_DEVICE_TABLE(of, stm32_pwm_lp_of_match); static struct platform_driver stm32_pwm_lp_driver = { .probe = stm32_pwm_lp_probe, - .remove = stm32_pwm_lp_remove, .driver = { .name = "stm32-pwm-lp", .of_match_table = of_match_ptr(stm32_pwm_lp_of_match), diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c index c952604e91f3..91ca67651abd 100644 --- a/drivers/pwm/pwm-sun4i.c +++ b/drivers/pwm/pwm-sun4i.c @@ -484,11 +484,8 @@ err_bus: static int sun4i_pwm_remove(struct platform_device *pdev) { struct sun4i_pwm_chip *pwm = platform_get_drvdata(pdev); - int ret; - ret = pwmchip_remove(&pwm->chip); - if (ret) - return ret; + pwmchip_remove(&pwm->chip); clk_disable_unprepare(pwm->bus_clk); reset_control_assert(pwm->rst); diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c index 35eb19a5a0d1..4701f0c9b921 100644 --- a/drivers/pwm/pwm-tiecap.c +++ b/drivers/pwm/pwm-tiecap.c @@ -253,7 +253,7 @@ static int ecap_pwm_probe(struct platform_device *pdev) if (IS_ERR(pc->mmio_base)) return PTR_ERR(pc->mmio_base); - ret = pwmchip_add(&pc->chip); + ret = devm_pwmchip_add(&pdev->dev, &pc->chip); if (ret < 0) { dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret); return ret; @@ -267,11 +267,9 @@ static int ecap_pwm_probe(struct platform_device *pdev) static int ecap_pwm_remove(struct platform_device *pdev) { - struct ecap_pwm_chip *pc = platform_get_drvdata(pdev); - pm_runtime_disable(&pdev->dev); - return pwmchip_remove(&pc->chip); + return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c index 17909fa53211..5b723a48c5f1 100644 --- a/drivers/pwm/pwm-tiehrpwm.c +++ b/drivers/pwm/pwm-tiehrpwm.c @@ -485,11 +485,13 @@ static int ehrpwm_pwm_remove(struct platform_device *pdev) { struct ehrpwm_pwm_chip *pc = platform_get_drvdata(pdev); + pwmchip_remove(&pc->chip); + clk_unprepare(pc->tbclk); pm_runtime_disable(&pdev->dev); - return pwmchip_remove(&pc->chip); + return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/pwm/pwm-twl-led.c b/drivers/pwm/pwm-twl-led.c index 6c8df5f4e87d..49d9f7a78012 100644 --- a/drivers/pwm/pwm-twl-led.c +++ b/drivers/pwm/pwm-twl-led.c @@ -276,7 +276,6 @@ static const struct pwm_ops twl6030_pwmled_ops = { static int twl_pwmled_probe(struct platform_device *pdev) { struct twl_pwmled_chip *twl; - int ret; twl = devm_kzalloc(&pdev->dev, sizeof(*twl), GFP_KERNEL); if (!twl) @@ -294,20 +293,7 @@ static int twl_pwmled_probe(struct platform_device *pdev) mutex_init(&twl->mutex); - ret = pwmchip_add(&twl->chip); - if (ret < 0) - return ret; - - platform_set_drvdata(pdev, twl); - - return 0; -} - -static int twl_pwmled_remove(struct platform_device *pdev) -{ - struct twl_pwmled_chip *twl = platform_get_drvdata(pdev); - - return pwmchip_remove(&twl->chip); + return devm_pwmchip_add(&pdev->dev, &twl->chip); } #ifdef CONFIG_OF @@ -325,7 +311,6 @@ static struct platform_driver twl_pwmled_driver = { .of_match_table = of_match_ptr(twl_pwmled_of_match), }, .probe = twl_pwmled_probe, - .remove = twl_pwmled_remove, }; module_platform_driver(twl_pwmled_driver); diff --git a/drivers/pwm/pwm-twl.c b/drivers/pwm/pwm-twl.c index e83a826bf621..203194f2c92e 100644 --- a/drivers/pwm/pwm-twl.c +++ b/drivers/pwm/pwm-twl.c @@ -298,7 +298,6 @@ static const struct pwm_ops twl6030_pwm_ops = { static int twl_pwm_probe(struct platform_device *pdev) { struct twl_pwm_chip *twl; - int ret; twl = devm_kzalloc(&pdev->dev, sizeof(*twl), GFP_KERNEL); if (!twl) @@ -314,20 +313,7 @@ static int twl_pwm_probe(struct platform_device *pdev) mutex_init(&twl->mutex); - ret = pwmchip_add(&twl->chip); - if (ret < 0) - return ret; - - platform_set_drvdata(pdev, twl); - - return 0; -} - -static int twl_pwm_remove(struct platform_device *pdev) -{ - struct twl_pwm_chip *twl = platform_get_drvdata(pdev); - - return pwmchip_remove(&twl->chip); + return devm_pwmchip_add(&pdev->dev, &twl->chip); } #ifdef CONFIG_OF @@ -345,7 +331,6 @@ static struct platform_driver twl_pwm_driver = { .of_match_table = of_match_ptr(twl_pwm_of_match), }, .probe = twl_pwm_probe, - .remove = twl_pwm_remove, }; module_platform_driver(twl_pwm_driver); diff --git a/drivers/regulator/max14577-regulator.c b/drivers/regulator/max14577-regulator.c index 1d78b455cc48..e34face736f4 100644 --- a/drivers/regulator/max14577-regulator.c +++ b/drivers/regulator/max14577-regulator.c @@ -269,5 +269,3 @@ module_exit(max14577_regulator_exit); MODULE_AUTHOR("Krzysztof Kozlowski <krzk@kernel.org>"); MODULE_DESCRIPTION("Maxim 14577/77836 regulator driver"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:max14577-regulator"); -MODULE_ALIAS("platform:max77836-regulator"); diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c index 6cca910a76de..7f458d510483 100644 --- a/drivers/regulator/qcom-rpmh-regulator.c +++ b/drivers/regulator/qcom-rpmh-regulator.c @@ -991,7 +991,7 @@ static const struct rpmh_vreg_init_data pm8009_1_vreg_data[] = { RPMH_VREG("ldo4", "ldo%s4", &pmic5_nldo, "vdd-l4"), RPMH_VREG("ldo5", "ldo%s5", &pmic5_pldo, "vdd-l5-l6"), RPMH_VREG("ldo6", "ldo%s6", &pmic5_pldo, "vdd-l5-l6"), - RPMH_VREG("ldo7", "ldo%s6", &pmic5_pldo_lv, "vdd-l7"), + RPMH_VREG("ldo7", "ldo%s7", &pmic5_pldo_lv, "vdd-l7"), {} }; diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 12153d5801ce..e1bc5214494e 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -75,6 +75,15 @@ config RTC_DEBUG Say yes here to enable debugging support in the RTC framework and individual RTC drivers. +config RTC_LIB_KUNIT_TEST + tristate "KUnit test for RTC lib functions" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + Enable this option to test RTC library functions. + + If unsure, say N. + config RTC_NVMEM bool "RTC non volatile storage support" select NVMEM @@ -624,6 +633,7 @@ config RTC_DRV_FM3130 config RTC_DRV_RX8010 tristate "Epson RX8010SJ" + select REGMAP_I2C help If you say yes here you get support for the Epson RX8010SJ RTC chip. diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 2dd0dd956b0e..5ceeafe4d5b2 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -15,6 +15,8 @@ rtc-core-$(CONFIG_RTC_INTF_DEV) += dev.o rtc-core-$(CONFIG_RTC_INTF_PROC) += proc.o rtc-core-$(CONFIG_RTC_INTF_SYSFS) += sysfs.o +obj-$(CONFIG_RTC_LIB_KUNIT_TEST) += lib_test.o + # Keep the list ordered. obj-$(CONFIG_RTC_DRV_88PM80X) += rtc-88pm80x.o diff --git a/drivers/rtc/lib.c b/drivers/rtc/lib.c index 23284580df97..fe361652727a 100644 --- a/drivers/rtc/lib.c +++ b/drivers/rtc/lib.c @@ -6,6 +6,8 @@ * Author: Alessandro Zummo <a.zummo@towertech.it> * * based on arch/arm/common/rtctime.c and other bits + * + * Author: Cassio Neri <cassio.neri@gmail.com> (rtc_time64_to_tm) */ #include <linux/export.h> @@ -22,8 +24,6 @@ static const unsigned short rtc_ydays[2][13] = { { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } }; -#define LEAPS_THRU_END_OF(y) ((y) / 4 - (y) / 100 + (y) / 400) - /* * The number of days in the month. */ @@ -42,42 +42,95 @@ int rtc_year_days(unsigned int day, unsigned int month, unsigned int year) } EXPORT_SYMBOL(rtc_year_days); -/* - * rtc_time64_to_tm - Converts time64_t to rtc_time. - * Convert seconds since 01-01-1970 00:00:00 to Gregorian date. +/** + * rtc_time64_to_tm - converts time64_t to rtc_time. + * + * @time: The number of seconds since 01-01-1970 00:00:00. + * (Must be positive.) + * @tm: Pointer to the struct rtc_time. */ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm) { - unsigned int month, year, secs; + unsigned int secs; int days; + u64 u64tmp; + u32 u32tmp, udays, century, day_of_century, year_of_century, year, + day_of_year, month, day; + bool is_Jan_or_Feb, is_leap_year; + /* time must be positive */ days = div_s64_rem(time, 86400, &secs); /* day of the week, 1970-01-01 was a Thursday */ tm->tm_wday = (days + 4) % 7; - year = 1970 + days / 365; - days -= (year - 1970) * 365 - + LEAPS_THRU_END_OF(year - 1) - - LEAPS_THRU_END_OF(1970 - 1); - while (days < 0) { - year -= 1; - days += 365 + is_leap_year(year); - } - tm->tm_year = year - 1900; - tm->tm_yday = days + 1; - - for (month = 0; month < 11; month++) { - int newdays; - - newdays = days - rtc_month_days(month, year); - if (newdays < 0) - break; - days = newdays; - } - tm->tm_mon = month; - tm->tm_mday = days + 1; + /* + * The following algorithm is, basically, Proposition 6.3 of Neri + * and Schneider [1]. In a few words: it works on the computational + * (fictitious) calendar where the year starts in March, month = 2 + * (*), and finishes in February, month = 13. This calendar is + * mathematically convenient because the day of the year does not + * depend on whether the year is leap or not. For instance: + * + * March 1st 0-th day of the year; + * ... + * April 1st 31-st day of the year; + * ... + * January 1st 306-th day of the year; (Important!) + * ... + * February 28th 364-th day of the year; + * February 29th 365-th day of the year (if it exists). + * + * After having worked out the date in the computational calendar + * (using just arithmetics) it's easy to convert it to the + * corresponding date in the Gregorian calendar. + * + * [1] "Euclidean Affine Functions and Applications to Calendar + * Algorithms". https://arxiv.org/abs/2102.06959 + * + * (*) The numbering of months follows rtc_time more closely and + * thus, is slightly different from [1]. + */ + + udays = ((u32) days) + 719468; + + u32tmp = 4 * udays + 3; + century = u32tmp / 146097; + day_of_century = u32tmp % 146097 / 4; + + u32tmp = 4 * day_of_century + 3; + u64tmp = 2939745ULL * u32tmp; + year_of_century = upper_32_bits(u64tmp); + day_of_year = lower_32_bits(u64tmp) / 2939745 / 4; + + year = 100 * century + year_of_century; + is_leap_year = year_of_century != 0 ? + year_of_century % 4 == 0 : century % 4 == 0; + + u32tmp = 2141 * day_of_year + 132377; + month = u32tmp >> 16; + day = ((u16) u32tmp) / 2141; + + /* + * Recall that January 01 is the 306-th day of the year in the + * computational (not Gregorian) calendar. + */ + is_Jan_or_Feb = day_of_year >= 306; + + /* Converts to the Gregorian calendar. */ + year = year + is_Jan_or_Feb; + month = is_Jan_or_Feb ? month - 12 : month; + day = day + 1; + + day_of_year = is_Jan_or_Feb ? + day_of_year - 306 : day_of_year + 31 + 28 + is_leap_year; + + /* Converts to rtc_time's format. */ + tm->tm_year = (int) (year - 1900); + tm->tm_mon = (int) month; + tm->tm_mday = (int) day; + tm->tm_yday = (int) day_of_year + 1; tm->tm_hour = secs / 3600; secs -= tm->tm_hour * 3600; diff --git a/drivers/rtc/lib_test.c b/drivers/rtc/lib_test.c new file mode 100644 index 000000000000..d5caf36c56cd --- /dev/null +++ b/drivers/rtc/lib_test.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: LGPL-2.1+ + +#include <kunit/test.h> +#include <linux/rtc.h> + +/* + * Advance a date by one day. + */ +static void advance_date(int *year, int *month, int *mday, int *yday) +{ + if (*mday != rtc_month_days(*month - 1, *year)) { + ++*mday; + ++*yday; + return; + } + + *mday = 1; + if (*month != 12) { + ++*month; + ++*yday; + return; + } + + *month = 1; + *yday = 1; + ++*year; +} + +/* + * Checks every day in a 160000 years interval starting on 1970-01-01 + * against the expected result. + */ +static void rtc_time64_to_tm_test_date_range(struct kunit *test) +{ + /* + * 160000 years = (160000 / 400) * 400 years + * = (160000 / 400) * 146097 days + * = (160000 / 400) * 146097 * 86400 seconds + */ + time64_t total_secs = ((time64_t) 160000) / 400 * 146097 * 86400; + + int year = 1970; + int month = 1; + int mday = 1; + int yday = 1; + + struct rtc_time result; + time64_t secs; + s64 days; + + for (secs = 0; secs <= total_secs; secs += 86400) { + + rtc_time64_to_tm(secs, &result); + + days = div_s64(secs, 86400); + + #define FAIL_MSG "%d/%02d/%02d (%2d) : %ld", \ + year, month, mday, yday, days + + KUNIT_ASSERT_EQ_MSG(test, year - 1900, result.tm_year, FAIL_MSG); + KUNIT_ASSERT_EQ_MSG(test, month - 1, result.tm_mon, FAIL_MSG); + KUNIT_ASSERT_EQ_MSG(test, mday, result.tm_mday, FAIL_MSG); + KUNIT_ASSERT_EQ_MSG(test, yday, result.tm_yday, FAIL_MSG); + + advance_date(&year, &month, &mday, &yday); + } +} + +static struct kunit_case rtc_lib_test_cases[] = { + KUNIT_CASE(rtc_time64_to_tm_test_date_range), + {} +}; + +static struct kunit_suite rtc_lib_test_suite = { + .name = "rtc_lib_test_cases", + .test_cases = rtc_lib_test_cases, +}; + +kunit_test_suite(rtc_lib_test_suite); + +MODULE_LICENSE("GPL"); diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 670fd8a2970e..4eb53412b808 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -229,19 +229,13 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t) if (!pm_trace_rtc_valid()) return -EIO; - /* REVISIT: if the clock has a "century" register, use - * that instead of the heuristic in mc146818_get_time(). - * That'll make Y3K compatility (year > 2070) easy! - */ mc146818_get_time(t); return 0; } static int cmos_set_time(struct device *dev, struct rtc_time *t) { - /* REVISIT: set the "century" register if available - * - * NOTE: this ignores the issue whereby updating the seconds + /* NOTE: this ignores the issue whereby updating the seconds * takes effect exactly 500ms after we write the register. * (Also queueing and other delays before we get this far.) */ @@ -1053,7 +1047,9 @@ static void cmos_check_wkalrm(struct device *dev) * ACK the rtc irq here */ if (t_now >= cmos->alarm_expires && cmos_use_acpi_alarm()) { + local_irq_disable(); cmos_interrupt(0, (void *)cmos->rtc); + local_irq_enable(); return; } diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index c914091819ba..d38aaf08108c 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -60,14 +60,23 @@ #define RX8025_ADJ_DATA_MAX 62 #define RX8025_ADJ_DATA_MIN -62 +enum rx_model { + model_rx_unknown, + model_rx_8025, + model_rx_8035, + model_last +}; + static const struct i2c_device_id rx8025_id[] = { - { "rx8025", 0 }, + { "rx8025", model_rx_8025 }, + { "rx8035", model_rx_8035 }, { } }; MODULE_DEVICE_TABLE(i2c, rx8025_id); struct rx8025_data { struct rtc_device *rtc; + enum rx_model model; u8 ctrl1; }; @@ -100,10 +109,26 @@ static s32 rx8025_write_regs(const struct i2c_client *client, length, values); } +static int rx8025_is_osc_stopped(enum rx_model model, int ctrl2) +{ + int xstp = ctrl2 & RX8025_BIT_CTRL2_XST; + /* XSTP bit has different polarity on RX-8025 vs RX-8035. + * RX-8025: 0 == oscillator stopped + * RX-8035: 1 == oscillator stopped + */ + + if (model == model_rx_8025) + xstp = !xstp; + + return xstp; +} + static int rx8025_check_validity(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); + struct rx8025_data *drvdata = dev_get_drvdata(dev); int ctrl2; + int xstp; ctrl2 = rx8025_read_reg(client, RX8025_REG_CTRL2); if (ctrl2 < 0) @@ -117,7 +142,8 @@ static int rx8025_check_validity(struct device *dev) return -EINVAL; } - if (!(ctrl2 & RX8025_BIT_CTRL2_XST)) { + xstp = rx8025_is_osc_stopped(drvdata->model, ctrl2); + if (xstp) { dev_warn(dev, "crystal stopped, date is invalid\n"); return -EINVAL; } @@ -127,6 +153,7 @@ static int rx8025_check_validity(struct device *dev) static int rx8025_reset_validity(struct i2c_client *client) { + struct rx8025_data *drvdata = i2c_get_clientdata(client); int ctrl2 = rx8025_read_reg(client, RX8025_REG_CTRL2); if (ctrl2 < 0) @@ -134,22 +161,28 @@ static int rx8025_reset_validity(struct i2c_client *client) ctrl2 &= ~(RX8025_BIT_CTRL2_PON | RX8025_BIT_CTRL2_VDET); + if (drvdata->model == model_rx_8025) + ctrl2 |= RX8025_BIT_CTRL2_XST; + else + ctrl2 &= ~(RX8025_BIT_CTRL2_XST); + return rx8025_write_reg(client, RX8025_REG_CTRL2, - ctrl2 | RX8025_BIT_CTRL2_XST); + ctrl2); } static irqreturn_t rx8025_handle_irq(int irq, void *dev_id) { struct i2c_client *client = dev_id; struct rx8025_data *rx8025 = i2c_get_clientdata(client); - int status; + int status, xstp; rtc_lock(rx8025->rtc); status = rx8025_read_reg(client, RX8025_REG_CTRL2); if (status < 0) goto out; - if (!(status & RX8025_BIT_CTRL2_XST)) + xstp = rx8025_is_osc_stopped(rx8025->model, status); + if (xstp) dev_warn(&client->dev, "Oscillation stop was detected," "you may have to readjust the clock\n"); @@ -519,6 +552,9 @@ static int rx8025_probe(struct i2c_client *client, i2c_set_clientdata(client, rx8025); + if (id) + rx8025->model = id->driver_data; + err = rx8025_init_client(client); if (err) return err; diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c index 6b56f8eacba6..fb9c6b709e13 100644 --- a/drivers/rtc/rtc-s5m.c +++ b/drivers/rtc/rtc-s5m.c @@ -204,15 +204,9 @@ static int s5m8767_tm_to_data(struct rtc_time *tm, u8 *data) data[RTC_WEEKDAY] = 1 << tm->tm_wday; data[RTC_DATE] = tm->tm_mday; data[RTC_MONTH] = tm->tm_mon + 1; - data[RTC_YEAR1] = tm->tm_year > 100 ? (tm->tm_year - 100) : 0; + data[RTC_YEAR1] = tm->tm_year - 100; - if (tm->tm_year < 100) { - pr_err("RTC cannot handle the year %d\n", - 1900 + tm->tm_year); - return -EINVAL; - } else { - return 0; - } + return 0; } /* @@ -786,29 +780,35 @@ static int s5m_rtc_probe(struct platform_device *pdev) if (ret) return ret; - device_init_wakeup(&pdev->dev, 1); - - info->rtc_dev = devm_rtc_device_register(&pdev->dev, "s5m-rtc", - &s5m_rtc_ops, THIS_MODULE); - + info->rtc_dev = devm_rtc_allocate_device(&pdev->dev); if (IS_ERR(info->rtc_dev)) return PTR_ERR(info->rtc_dev); - if (!info->irq) { - dev_info(&pdev->dev, "Alarm IRQ not available\n"); - return 0; + info->rtc_dev->ops = &s5m_rtc_ops; + + if (info->device_type == S5M8763X) { + info->rtc_dev->range_min = RTC_TIMESTAMP_BEGIN_0000; + info->rtc_dev->range_max = RTC_TIMESTAMP_END_9999; + } else { + info->rtc_dev->range_min = RTC_TIMESTAMP_BEGIN_2000; + info->rtc_dev->range_max = RTC_TIMESTAMP_END_2099; } - ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL, - s5m_rtc_alarm_irq, 0, "rtc-alarm0", - info); - if (ret < 0) { - dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n", - info->irq, ret); - return ret; + if (!info->irq) { + clear_bit(RTC_FEATURE_ALARM, info->rtc_dev->features); + } else { + ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL, + s5m_rtc_alarm_irq, 0, "rtc-alarm0", + info); + if (ret < 0) { + dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n", + info->irq, ret); + return ret; + } + device_init_wakeup(&pdev->dev, 1); } - return 0; + return devm_rtc_register_device(info->rtc_dev); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/rtc/rtc-tps65910.c b/drivers/rtc/rtc-tps65910.c index bc89c62ccb9b..75e4c2d777b9 100644 --- a/drivers/rtc/rtc-tps65910.c +++ b/drivers/rtc/rtc-tps65910.c @@ -467,6 +467,6 @@ static struct platform_driver tps65910_rtc_driver = { }; module_platform_driver(tps65910_rtc_driver); -MODULE_ALIAS("platform:rtc-tps65910"); +MODULE_ALIAS("platform:tps65910-rtc"); MODULE_AUTHOR("Venu Byravarasu <vbyravarasu@nvidia.com>"); MODULE_LICENSE("GPL"); diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 2f3515fa242a..f3d5c7f4c13d 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -45,13 +45,14 @@ static void __init sclp_early_facilities_detect(void) sclp.has_gisaf = !!(sccb->fac118 & 0x08); sclp.has_hvs = !!(sccb->fac119 & 0x80); sclp.has_kss = !!(sccb->fac98 & 0x01); - sclp.has_sipl = !!(sccb->cbl & 0x4000); if (sccb->fac85 & 0x02) S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; if (sccb->fac91 & 0x40) S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_GUEST; if (sccb->cpuoff > 134) sclp.has_diag318 = !!(sccb->byte_134 & 0x80); + if (sccb->cpuoff > 137) + sclp.has_sipl = !!(sccb->cbl & 0x4000); sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; sclp.rzm <<= 20; diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index f433428057d9..d9b804943d19 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -213,7 +213,6 @@ static inline int ap_fetch_qci_info(struct ap_config_info *info) * ap_init_qci_info(): Allocate and query qci config info. * Does also update the static variables ap_max_domain_id * and ap_max_adapter_id if this info is available. - */ static void __init ap_init_qci_info(void) { @@ -439,6 +438,7 @@ static enum hrtimer_restart ap_poll_timeout(struct hrtimer *unused) /** * ap_interrupt_handler() - Schedule ap_tasklet on interrupt * @airq: pointer to adapter interrupt descriptor + * @floating: ignored */ static void ap_interrupt_handler(struct airq_struct *airq, bool floating) { @@ -1786,6 +1786,7 @@ static inline void ap_scan_adapter(int ap) /** * ap_scan_bus(): Scan the AP bus for new devices * Runs periodically, workqueue timer (ap_config_time) + * @unused: Unused pointer. */ static void ap_scan_bus(struct work_struct *unused) { diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index d70c4d3d0907..9ea48bf0ee40 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -20,7 +20,7 @@ static void __ap_flush_queue(struct ap_queue *aq); /** * ap_queue_enable_irq(): Enable interrupt support on this AP queue. - * @qid: The AP queue number + * @aq: The AP queue * @ind: the notification indicator byte * * Enables interruption on AP queue via ap_aqic(). Based on the return @@ -311,7 +311,7 @@ static enum ap_sm_wait ap_sm_read_write(struct ap_queue *aq) /** * ap_sm_reset(): Reset an AP queue. - * @qid: The AP queue number + * @aq: The AP queue * * Submit the Reset command to an AP queue. */ diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index 540861ca2ba3..553b6b9d0222 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -600,6 +600,12 @@ static int rockchip_spi_transfer_one( int ret; bool use_dma; + /* Zero length transfers won't trigger an interrupt on completion */ + if (!xfer->len) { + spi_finalize_current_transfer(ctlr); + return 1; + } + WARN_ON(readl_relaxed(rs->regs + ROCKCHIP_SPI_SSIENR) && (readl_relaxed(rs->regs + ROCKCHIP_SPI_SR) & SR_BUSY)); diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index ebd27f883033..8ce840c7ecc8 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -204,9 +204,6 @@ struct tegra_slink_data { struct dma_async_tx_descriptor *tx_dma_desc; }; -static int tegra_slink_runtime_suspend(struct device *dev); -static int tegra_slink_runtime_resume(struct device *dev); - static inline u32 tegra_slink_readl(struct tegra_slink_data *tspi, unsigned long reg) { @@ -1185,6 +1182,7 @@ static int tegra_slink_resume(struct device *dev) } #endif +#ifdef CONFIG_PM static int tegra_slink_runtime_suspend(struct device *dev) { struct spi_master *master = dev_get_drvdata(dev); @@ -1210,6 +1208,7 @@ static int tegra_slink_runtime_resume(struct device *dev) } return 0; } +#endif /* CONFIG_PM */ static const struct dev_pm_ops slink_pm_ops = { SET_RUNTIME_PM_OPS(tegra_slink_runtime_suspend, diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 823354a1a91a..19926beeb3b7 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -108,9 +108,12 @@ static struct attribute *imok_attr[] = { NULL }; +static const struct attribute_group imok_attribute_group = { + .attrs = imok_attr, +}; + static const struct attribute_group data_attribute_group = { .bin_attrs = data_attributes, - .attrs = imok_attr, }; static ssize_t available_uuids_show(struct device *dev, @@ -522,6 +525,12 @@ static int int3400_thermal_probe(struct platform_device *pdev) if (result) goto free_rel_misc; + if (acpi_has_method(priv->adev->handle, "IMOK")) { + result = sysfs_create_group(&pdev->dev.kobj, &imok_attribute_group); + if (result) + goto free_imok; + } + if (priv->data_vault) { result = sysfs_create_group(&pdev->dev.kobj, &data_attribute_group); @@ -545,6 +554,8 @@ free_sysfs: } free_uuid: sysfs_remove_group(&pdev->dev.kobj, &uuid_attribute_group); +free_imok: + sysfs_remove_group(&pdev->dev.kobj, &imok_attribute_group); free_rel_misc: if (!priv->rel_misc_dev_res) acpi_thermal_rel_misc_device_remove(priv->adev->handle); @@ -573,6 +584,7 @@ static int int3400_thermal_remove(struct platform_device *pdev) if (priv->data_vault) sysfs_remove_group(&pdev->dev.kobj, &data_attribute_group); sysfs_remove_group(&pdev->dev.kobj, &uuid_attribute_group); + sysfs_remove_group(&pdev->dev.kobj, &imok_attribute_group); thermal_zone_device_unregister(priv->thermal); kfree(priv->data_vault); kfree(priv->trts); diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index b0eb5ece9243..a5b58ea89cc6 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -528,7 +528,7 @@ static int start_power_clamp(void) set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1); /* prevent cpu hotplug */ - get_online_cpus(); + cpus_read_lock(); /* prefer BSP */ control_cpu = 0; @@ -542,7 +542,7 @@ static int start_power_clamp(void) for_each_online_cpu(cpu) { start_power_clamp_worker(cpu); } - put_online_cpus(); + cpus_read_unlock(); return 0; } diff --git a/drivers/thermal/intel/intel_tcc_cooling.c b/drivers/thermal/intel/intel_tcc_cooling.c index 8ec10d55d421..cd80c7db4073 100644 --- a/drivers/thermal/intel/intel_tcc_cooling.c +++ b/drivers/thermal/intel/intel_tcc_cooling.c @@ -79,6 +79,8 @@ static const struct x86_cpu_id tcc_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, NULL), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, NULL), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, NULL), {} }; diff --git a/drivers/thermal/qcom/Kconfig b/drivers/thermal/qcom/Kconfig index 8d5ac2df26dc..7d942f71e532 100644 --- a/drivers/thermal/qcom/Kconfig +++ b/drivers/thermal/qcom/Kconfig @@ -31,3 +31,13 @@ config QCOM_SPMI_TEMP_ALARM trip points. The temperature reported by the thermal sensor reflects the real time die temperature if an ADC is present or an estimate of the temperature based upon the over temperature stage value. + +config QCOM_LMH + tristate "Qualcomm Limits Management Hardware" + depends on ARCH_QCOM + help + This enables initialization of Qualcomm limits management + hardware(LMh). LMh allows for hardware-enforced mitigation for cpus based on + input from temperature and current sensors. On many newer Qualcomm SoCs + LMh is configured in the firmware and this feature need not be enabled. + However, on certain SoCs like sdm845 LMh has to be configured from kernel. diff --git a/drivers/thermal/qcom/Makefile b/drivers/thermal/qcom/Makefile index 252ea7d9da0b..0fa2512042e7 100644 --- a/drivers/thermal/qcom/Makefile +++ b/drivers/thermal/qcom/Makefile @@ -5,3 +5,4 @@ qcom_tsens-y += tsens.o tsens-v2.o tsens-v1.o tsens-v0_1.o \ tsens-8960.o obj-$(CONFIG_QCOM_SPMI_ADC_TM5) += qcom-spmi-adc-tm5.o obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM) += qcom-spmi-temp-alarm.o +obj-$(CONFIG_QCOM_LMH) += lmh.o diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c new file mode 100644 index 000000000000..eafa7526eb8b --- /dev/null +++ b/drivers/thermal/qcom/lmh.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright (C) 2021, Linaro Limited. All rights reserved. + */ +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/irqdomain.h> +#include <linux/err.h> +#include <linux/platform_device.h> +#include <linux/of_platform.h> +#include <linux/slab.h> +#include <linux/qcom_scm.h> + +#define LMH_NODE_DCVS 0x44435653 +#define LMH_CLUSTER0_NODE_ID 0x6370302D +#define LMH_CLUSTER1_NODE_ID 0x6370312D + +#define LMH_SUB_FN_THERMAL 0x54484D4C +#define LMH_SUB_FN_CRNT 0x43524E54 +#define LMH_SUB_FN_REL 0x52454C00 +#define LMH_SUB_FN_BCL 0x42434C00 + +#define LMH_ALGO_MODE_ENABLE 0x454E424C +#define LMH_TH_HI_THRESHOLD 0x48494748 +#define LMH_TH_LOW_THRESHOLD 0x4C4F5700 +#define LMH_TH_ARM_THRESHOLD 0x41524D00 + +#define LMH_REG_DCVS_INTR_CLR 0x8 + +struct lmh_hw_data { + void __iomem *base; + struct irq_domain *domain; + int irq; +}; + +static irqreturn_t lmh_handle_irq(int hw_irq, void *data) +{ + struct lmh_hw_data *lmh_data = data; + int irq = irq_find_mapping(lmh_data->domain, 0); + + /* Call the cpufreq driver to handle the interrupt */ + if (irq) + generic_handle_irq(irq); + + return 0; +} + +static void lmh_enable_interrupt(struct irq_data *d) +{ + struct lmh_hw_data *lmh_data = irq_data_get_irq_chip_data(d); + + /* Clear the existing interrupt */ + writel(0xff, lmh_data->base + LMH_REG_DCVS_INTR_CLR); + enable_irq(lmh_data->irq); +} + +static void lmh_disable_interrupt(struct irq_data *d) +{ + struct lmh_hw_data *lmh_data = irq_data_get_irq_chip_data(d); + + disable_irq_nosync(lmh_data->irq); +} + +static struct irq_chip lmh_irq_chip = { + .name = "lmh", + .irq_enable = lmh_enable_interrupt, + .irq_disable = lmh_disable_interrupt +}; + +static int lmh_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) +{ + struct lmh_hw_data *lmh_data = d->host_data; + + irq_set_chip_and_handler(irq, &lmh_irq_chip, handle_simple_irq); + irq_set_chip_data(irq, lmh_data); + + return 0; +} + +static const struct irq_domain_ops lmh_irq_ops = { + .map = lmh_irq_map, + .xlate = irq_domain_xlate_onecell, +}; + +static int lmh_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct device_node *cpu_node; + struct lmh_hw_data *lmh_data; + int temp_low, temp_high, temp_arm, cpu_id, ret; + u32 node_id; + + lmh_data = devm_kzalloc(dev, sizeof(*lmh_data), GFP_KERNEL); + if (!lmh_data) + return -ENOMEM; + + lmh_data->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(lmh_data->base)) + return PTR_ERR(lmh_data->base); + + cpu_node = of_parse_phandle(np, "cpus", 0); + if (!cpu_node) + return -EINVAL; + cpu_id = of_cpu_node_to_id(cpu_node); + of_node_put(cpu_node); + + ret = of_property_read_u32(np, "qcom,lmh-temp-high-millicelsius", &temp_high); + if (ret) { + dev_err(dev, "missing qcom,lmh-temp-high-millicelsius property\n"); + return ret; + } + + ret = of_property_read_u32(np, "qcom,lmh-temp-low-millicelsius", &temp_low); + if (ret) { + dev_err(dev, "missing qcom,lmh-temp-low-millicelsius property\n"); + return ret; + } + + ret = of_property_read_u32(np, "qcom,lmh-temp-arm-millicelsius", &temp_arm); + if (ret) { + dev_err(dev, "missing qcom,lmh-temp-arm-millicelsius property\n"); + return ret; + } + + /* + * Only sdm845 has lmh hardware currently enabled from hlos. If this is needed + * for other platforms, revisit this to check if the <cpu-id, node-id> should be part + * of a dt match table. + */ + if (cpu_id == 0) { + node_id = LMH_CLUSTER0_NODE_ID; + } else if (cpu_id == 4) { + node_id = LMH_CLUSTER1_NODE_ID; + } else { + dev_err(dev, "Wrong CPU id associated with LMh node\n"); + return -EINVAL; + } + + if (!qcom_scm_lmh_dcvsh_available()) + return -EINVAL; + + ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_CRNT, LMH_ALGO_MODE_ENABLE, 1, + LMH_NODE_DCVS, node_id, 0); + if (ret) + dev_err(dev, "Error %d enabling current subfunction\n", ret); + + ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_REL, LMH_ALGO_MODE_ENABLE, 1, + LMH_NODE_DCVS, node_id, 0); + if (ret) + dev_err(dev, "Error %d enabling reliability subfunction\n", ret); + + ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_BCL, LMH_ALGO_MODE_ENABLE, 1, + LMH_NODE_DCVS, node_id, 0); + if (ret) + dev_err(dev, "Error %d enabling BCL subfunction\n", ret); + + ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_ALGO_MODE_ENABLE, 1, + LMH_NODE_DCVS, node_id, 0); + if (ret) { + dev_err(dev, "Error %d enabling thermal subfunction\n", ret); + return ret; + } + + ret = qcom_scm_lmh_profile_change(0x1); + if (ret) { + dev_err(dev, "Error %d changing profile\n", ret); + return ret; + } + + /* Set default thermal trips */ + ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_TH_ARM_THRESHOLD, temp_arm, + LMH_NODE_DCVS, node_id, 0); + if (ret) { + dev_err(dev, "Error setting thermal ARM threshold%d\n", ret); + return ret; + } + + ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_TH_HI_THRESHOLD, temp_high, + LMH_NODE_DCVS, node_id, 0); + if (ret) { + dev_err(dev, "Error setting thermal HI threshold%d\n", ret); + return ret; + } + + ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_TH_LOW_THRESHOLD, temp_low, + LMH_NODE_DCVS, node_id, 0); + if (ret) { + dev_err(dev, "Error setting thermal ARM threshold%d\n", ret); + return ret; + } + + lmh_data->irq = platform_get_irq(pdev, 0); + lmh_data->domain = irq_domain_add_linear(np, 1, &lmh_irq_ops, lmh_data); + if (!lmh_data->domain) { + dev_err(dev, "Error adding irq_domain\n"); + return -EINVAL; + } + + /* Disable the irq and let cpufreq enable it when ready to handle the interrupt */ + irq_set_status_flags(lmh_data->irq, IRQ_NOAUTOEN); + ret = devm_request_irq(dev, lmh_data->irq, lmh_handle_irq, + IRQF_ONESHOT | IRQF_NO_SUSPEND, + "lmh-irq", lmh_data); + if (ret) { + dev_err(dev, "Error %d registering irq %x\n", ret, lmh_data->irq); + irq_domain_remove(lmh_data->domain); + return ret; + } + + return 0; +} + +static const struct of_device_id lmh_table[] = { + { .compatible = "qcom,sdm845-lmh", }, + {} +}; +MODULE_DEVICE_TABLE(of, lmh_table); + +static struct platform_driver lmh_driver = { + .probe = lmh_probe, + .driver = { + .name = "qcom-lmh", + .of_match_table = lmh_table, + .suppress_bind_attrs = true, + }, +}; +module_platform_driver(lmh_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("QCOM LMh driver"); diff --git a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c index 232fd0b33325..8494cc04aa21 100644 --- a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c +++ b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c @@ -359,6 +359,12 @@ static int adc_tm5_register_tzd(struct adc_tm5_chip *adc_tm) &adc_tm->channels[i], &adc_tm5_ops); if (IS_ERR(tzd)) { + if (PTR_ERR(tzd) == -ENODEV) { + dev_warn(adc_tm->dev, "thermal sensor on channel %d is not used\n", + adc_tm->channels[i].channel); + continue; + } + dev_err(adc_tm->dev, "Error registering TZ zone for channel %d: %ld\n", adc_tm->channels[i].channel, PTR_ERR(tzd)); return PTR_ERR(tzd); diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c index fdf16aa34eb4..85228d308dd3 100644 --- a/drivers/thermal/rcar_gen3_thermal.c +++ b/drivers/thermal/rcar_gen3_thermal.c @@ -84,7 +84,7 @@ struct rcar_gen3_thermal_tsc { struct thermal_zone_device *zone; struct equation_coefs coef; int tj_t; - int id; /* thermal channel id */ + unsigned int id; /* thermal channel id */ }; struct rcar_gen3_thermal_priv { @@ -190,10 +190,64 @@ static int rcar_gen3_thermal_get_temp(void *devdata, int *temp) return 0; } -static const struct thermal_zone_of_device_ops rcar_gen3_tz_of_ops = { +static int rcar_gen3_thermal_mcelsius_to_temp(struct rcar_gen3_thermal_tsc *tsc, + int mcelsius) +{ + int celsius, val; + + celsius = DIV_ROUND_CLOSEST(mcelsius, 1000); + if (celsius <= INT_FIXPT(tsc->tj_t)) + val = celsius * tsc->coef.a1 + tsc->coef.b1; + else + val = celsius * tsc->coef.a2 + tsc->coef.b2; + + return INT_FIXPT(val); +} + +static int rcar_gen3_thermal_set_trips(void *devdata, int low, int high) +{ + struct rcar_gen3_thermal_tsc *tsc = devdata; + u32 irqmsk = 0; + + if (low != -INT_MAX) { + irqmsk |= IRQ_TEMPD1; + rcar_gen3_thermal_write(tsc, REG_GEN3_IRQTEMP1, + rcar_gen3_thermal_mcelsius_to_temp(tsc, low)); + } + + if (high != INT_MAX) { + irqmsk |= IRQ_TEMP2; + rcar_gen3_thermal_write(tsc, REG_GEN3_IRQTEMP2, + rcar_gen3_thermal_mcelsius_to_temp(tsc, high)); + } + + rcar_gen3_thermal_write(tsc, REG_GEN3_IRQMSK, irqmsk); + + return 0; +} + +static struct thermal_zone_of_device_ops rcar_gen3_tz_of_ops = { .get_temp = rcar_gen3_thermal_get_temp, + .set_trips = rcar_gen3_thermal_set_trips, }; +static irqreturn_t rcar_gen3_thermal_irq(int irq, void *data) +{ + struct rcar_gen3_thermal_priv *priv = data; + unsigned int i; + u32 status; + + for (i = 0; i < priv->num_tscs; i++) { + status = rcar_gen3_thermal_read(priv->tscs[i], REG_GEN3_IRQSTR); + rcar_gen3_thermal_write(priv->tscs[i], REG_GEN3_IRQSTR, 0); + if (status) + thermal_zone_device_update(priv->tscs[i]->zone, + THERMAL_EVENT_UNSPECIFIED); + } + + return IRQ_HANDLED; +} + static const struct soc_device_attribute r8a7795es1[] = { { .soc_id = "r8a7795", .revision = "ES1.*" }, { /* sentinel */ } @@ -210,6 +264,9 @@ static void rcar_gen3_thermal_init_r8a7795es1(struct rcar_gen3_thermal_tsc *tsc) rcar_gen3_thermal_write(tsc, REG_GEN3_IRQCTL, 0x3F); rcar_gen3_thermal_write(tsc, REG_GEN3_IRQMSK, 0); + if (tsc->zone->ops->set_trips) + rcar_gen3_thermal_write(tsc, REG_GEN3_IRQEN, + IRQ_TEMPD1 | IRQ_TEMP2); rcar_gen3_thermal_write(tsc, REG_GEN3_CTSR, CTSR_PONM | CTSR_AOUT | CTSR_THBGR | CTSR_VMEN); @@ -235,6 +292,9 @@ static void rcar_gen3_thermal_init(struct rcar_gen3_thermal_tsc *tsc) rcar_gen3_thermal_write(tsc, REG_GEN3_IRQCTL, 0); rcar_gen3_thermal_write(tsc, REG_GEN3_IRQMSK, 0); + if (tsc->zone->ops->set_trips) + rcar_gen3_thermal_write(tsc, REG_GEN3_IRQEN, + IRQ_TEMPD1 | IRQ_TEMP2); reg_val = rcar_gen3_thermal_read(tsc, REG_GEN3_THCTR); reg_val |= THCTR_THSST; @@ -303,6 +363,34 @@ static void rcar_gen3_hwmon_action(void *data) thermal_remove_hwmon_sysfs(zone); } +static int rcar_gen3_thermal_request_irqs(struct rcar_gen3_thermal_priv *priv, + struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + unsigned int i; + char *irqname; + int ret, irq; + + for (i = 0; i < 2; i++) { + irq = platform_get_irq_optional(pdev, i); + if (irq < 0) + return irq; + + irqname = devm_kasprintf(dev, GFP_KERNEL, "%s:ch%d", + dev_name(dev), i); + if (!irqname) + return -ENOMEM; + + ret = devm_request_threaded_irq(dev, irq, NULL, + rcar_gen3_thermal_irq, + IRQF_ONESHOT, irqname, priv); + if (ret) + return ret; + } + + return 0; +} + static int rcar_gen3_thermal_probe(struct platform_device *pdev) { struct rcar_gen3_thermal_priv *priv; @@ -310,7 +398,8 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev) const int *ths_tj_1 = of_device_get_match_data(dev); struct resource *res; struct thermal_zone_device *zone; - int ret, i; + unsigned int i; + int ret; /* default values if FUSEs are missing */ /* TODO: Read values from hardware on supported platforms */ @@ -326,6 +415,9 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv); + if (rcar_gen3_thermal_request_irqs(priv, pdev)) + rcar_gen3_tz_of_ops.set_trips = NULL; + pm_runtime_enable(dev); pm_runtime_get_sync(dev); @@ -351,9 +443,6 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev) priv->tscs[i] = tsc; - priv->thermal_init(tsc); - rcar_gen3_thermal_calc_coefs(tsc, ptat, thcodes[i], *ths_tj_1); - zone = devm_thermal_zone_of_sensor_register(dev, i, tsc, &rcar_gen3_tz_of_ops); if (IS_ERR(zone)) { @@ -363,6 +452,9 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev) } tsc->zone = zone; + priv->thermal_init(tsc); + rcar_gen3_thermal_calc_coefs(tsc, ptat, thcodes[i], *ths_tj_1); + tsc->zone->tzp->no_hwmon = false; ret = thermal_add_hwmon_sysfs(tsc->zone); if (ret) @@ -376,7 +468,7 @@ static int rcar_gen3_thermal_probe(struct platform_device *pdev) if (ret < 0) goto error_unregister; - dev_info(dev, "TSC%d: Loaded %d trip points\n", i, ret); + dev_info(dev, "TSC%u: Loaded %d trip points\n", i, ret); } priv->num_tscs = i; @@ -401,8 +493,12 @@ static int __maybe_unused rcar_gen3_thermal_resume(struct device *dev) for (i = 0; i < priv->num_tscs; i++) { struct rcar_gen3_thermal_tsc *tsc = priv->tscs[i]; + struct thermal_zone_device *zone = tsc->zone; priv->thermal_init(tsc); + if (zone->ops->set_trips) + rcar_gen3_thermal_set_trips(tsc, zone->prev_low_trip, + zone->prev_high_trip); } return 0; diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index e9a90bc23b11..f4ab4c5b4b62 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -1073,6 +1073,7 @@ static int exynos_tmu_probe(struct platform_device *pdev) data->sclk = devm_clk_get(&pdev->dev, "tmu_sclk"); if (IS_ERR(data->sclk)) { dev_err(&pdev->dev, "Failed to get sclk\n"); + ret = PTR_ERR(data->sclk); goto err_clk; } else { ret = clk_prepare_enable(data->sclk); diff --git a/drivers/thermal/tegra/Kconfig b/drivers/thermal/tegra/Kconfig index 46c2215867cd..cfa41d87a794 100644 --- a/drivers/thermal/tegra/Kconfig +++ b/drivers/thermal/tegra/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only menu "NVIDIA Tegra thermal drivers" -depends on ARCH_TEGRA +depends on ARCH_TEGRA || COMPILE_TEST config TEGRA_SOCTHERM tristate "Tegra SOCTHERM thermal management" @@ -18,4 +18,11 @@ config TEGRA_BPMP_THERMAL Enable this option for support for sensing system temperature of NVIDIA Tegra systems-on-chip with the BPMP coprocessor (Tegra186). +config TEGRA30_TSENSOR + tristate "Tegra30 Thermal Sensor" + depends on ARCH_TEGRA_3x_SOC || COMPILE_TEST + help + Enable this option to support thermal management of NVIDIA Tegra30 + system-on-chip. + endmenu diff --git a/drivers/thermal/tegra/Makefile b/drivers/thermal/tegra/Makefile index 0f2b66edf0d2..eb27d194c583 100644 --- a/drivers/thermal/tegra/Makefile +++ b/drivers/thermal/tegra/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_TEGRA_SOCTHERM) += tegra-soctherm.o obj-$(CONFIG_TEGRA_BPMP_THERMAL) += tegra-bpmp-thermal.o +obj-$(CONFIG_TEGRA30_TSENSOR) += tegra30-tsensor.o tegra-soctherm-y := soctherm.o soctherm-fuse.o tegra-soctherm-$(CONFIG_ARCH_TEGRA_124_SOC) += tegra124-soctherm.o diff --git a/drivers/thermal/tegra/soctherm.c b/drivers/thermal/tegra/soctherm.c index 8e303e9d1dc0..210325f92559 100644 --- a/drivers/thermal/tegra/soctherm.c +++ b/drivers/thermal/tegra/soctherm.c @@ -450,8 +450,8 @@ static int enforce_temp_range(struct device *dev, int trip_temp) temp = clamp_val(trip_temp, min_low_temp, max_high_temp); if (temp != trip_temp) - dev_info(dev, "soctherm: trip temperature %d forced to %d\n", - trip_temp, temp); + dev_dbg(dev, "soctherm: trip temperature %d forced to %d\n", + trip_temp, temp); return temp; } diff --git a/drivers/thermal/tegra/tegra30-tsensor.c b/drivers/thermal/tegra/tegra30-tsensor.c new file mode 100644 index 000000000000..9b6b693cbcf8 --- /dev/null +++ b/drivers/thermal/tegra/tegra30-tsensor.c @@ -0,0 +1,673 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Tegra30 SoC Thermal Sensor driver + * + * Based on downstream HWMON driver from NVIDIA. + * Copyright (C) 2011 NVIDIA Corporation + * + * Author: Dmitry Osipenko <digetx@gmail.com> + * Copyright (C) 2021 GRATE-DRIVER project + */ + +#include <linux/bitfield.h> +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/math.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/pm.h> +#include <linux/reset.h> +#include <linux/slab.h> +#include <linux/thermal.h> +#include <linux/types.h> + +#include <soc/tegra/fuse.h> + +#include "../thermal_core.h" +#include "../thermal_hwmon.h" + +#define TSENSOR_SENSOR0_CONFIG0 0x0 +#define TSENSOR_SENSOR0_CONFIG0_SENSOR_STOP BIT(0) +#define TSENSOR_SENSOR0_CONFIG0_HW_FREQ_DIV_EN BIT(1) +#define TSENSOR_SENSOR0_CONFIG0_THERMAL_RST_EN BIT(2) +#define TSENSOR_SENSOR0_CONFIG0_DVFS_EN BIT(3) +#define TSENSOR_SENSOR0_CONFIG0_INTR_OVERFLOW_EN BIT(4) +#define TSENSOR_SENSOR0_CONFIG0_INTR_HW_FREQ_DIV_EN BIT(5) +#define TSENSOR_SENSOR0_CONFIG0_INTR_THERMAL_RST_EN BIT(6) +#define TSENSOR_SENSOR0_CONFIG0_M GENMASK(23, 8) +#define TSENSOR_SENSOR0_CONFIG0_N GENMASK(31, 24) + +#define TSENSOR_SENSOR0_CONFIG1 0x8 +#define TSENSOR_SENSOR0_CONFIG1_TH1 GENMASK(15, 0) +#define TSENSOR_SENSOR0_CONFIG1_TH2 GENMASK(31, 16) + +#define TSENSOR_SENSOR0_CONFIG2 0xc +#define TSENSOR_SENSOR0_CONFIG2_TH3 GENMASK(15, 0) + +#define TSENSOR_SENSOR0_STATUS0 0x18 +#define TSENSOR_SENSOR0_STATUS0_STATE GENMASK(2, 0) +#define TSENSOR_SENSOR0_STATUS0_INTR BIT(8) +#define TSENSOR_SENSOR0_STATUS0_CURRENT_VALID BIT(9) + +#define TSENSOR_SENSOR0_TS_STATUS1 0x1c +#define TSENSOR_SENSOR0_TS_STATUS1_CURRENT_COUNT GENMASK(31, 16) + +#define TEGRA30_FUSE_TEST_PROG_VER 0x28 + +#define TEGRA30_FUSE_TSENSOR_CALIB 0x98 +#define TEGRA30_FUSE_TSENSOR_CALIB_LOW GENMASK(15, 0) +#define TEGRA30_FUSE_TSENSOR_CALIB_HIGH GENMASK(31, 16) + +#define TEGRA30_FUSE_SPARE_BIT 0x144 + +struct tegra_tsensor; + +struct tegra_tsensor_calibration_data { + int a, b, m, n, p, r; +}; + +struct tegra_tsensor_channel { + void __iomem *regs; + unsigned int id; + struct tegra_tsensor *ts; + struct thermal_zone_device *tzd; +}; + +struct tegra_tsensor { + void __iomem *regs; + bool swap_channels; + struct clk *clk; + struct device *dev; + struct reset_control *rst; + struct tegra_tsensor_channel ch[2]; + struct tegra_tsensor_calibration_data calib; +}; + +static int tegra_tsensor_hw_enable(const struct tegra_tsensor *ts) +{ + u32 val; + int err; + + err = reset_control_assert(ts->rst); + if (err) { + dev_err(ts->dev, "failed to assert hardware reset: %d\n", err); + return err; + } + + err = clk_prepare_enable(ts->clk); + if (err) { + dev_err(ts->dev, "failed to enable clock: %d\n", err); + return err; + } + + fsleep(1000); + + err = reset_control_deassert(ts->rst); + if (err) { + dev_err(ts->dev, "failed to deassert hardware reset: %d\n", err); + goto disable_clk; + } + + /* + * Sensors are enabled after reset by default, but not gauging + * until clock counter is programmed. + * + * M: number of reference clock pulses after which every + * temperature / voltage measurement is made + * + * N: number of reference clock counts for which the counter runs + */ + val = FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_M, 12500); + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_N, 255); + + /* apply the same configuration to both channels */ + writel_relaxed(val, ts->regs + 0x40 + TSENSOR_SENSOR0_CONFIG0); + writel_relaxed(val, ts->regs + 0x80 + TSENSOR_SENSOR0_CONFIG0); + + return 0; + +disable_clk: + clk_disable_unprepare(ts->clk); + + return err; +} + +static int tegra_tsensor_hw_disable(const struct tegra_tsensor *ts) +{ + int err; + + err = reset_control_assert(ts->rst); + if (err) { + dev_err(ts->dev, "failed to assert hardware reset: %d\n", err); + return err; + } + + clk_disable_unprepare(ts->clk); + + return 0; +} + +static void devm_tegra_tsensor_hw_disable(void *data) +{ + const struct tegra_tsensor *ts = data; + + tegra_tsensor_hw_disable(ts); +} + +static int tegra_tsensor_get_temp(void *data, int *temp) +{ + const struct tegra_tsensor_channel *tsc = data; + const struct tegra_tsensor *ts = tsc->ts; + int err, c1, c2, c3, c4, counter; + u32 val; + + /* + * Counter will be invalid if hardware is misprogrammed or not enough + * time passed since the time when sensor was enabled. + */ + err = readl_relaxed_poll_timeout(tsc->regs + TSENSOR_SENSOR0_STATUS0, val, + val & TSENSOR_SENSOR0_STATUS0_CURRENT_VALID, + 21 * USEC_PER_MSEC, + 21 * USEC_PER_MSEC * 50); + if (err) { + dev_err_once(ts->dev, "ch%u: counter invalid\n", tsc->id); + return err; + } + + val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_TS_STATUS1); + counter = FIELD_GET(TSENSOR_SENSOR0_TS_STATUS1_CURRENT_COUNT, val); + + /* + * This shouldn't happen with a valid counter status, nevertheless + * lets verify the value since it's in a separate (from status) + * register. + */ + if (counter == 0xffff) { + dev_err_once(ts->dev, "ch%u: counter overflow\n", tsc->id); + return -EINVAL; + } + + /* + * temperature = a * counter + b + * temperature = m * (temperature ^ 2) + n * temperature + p + */ + c1 = DIV_ROUND_CLOSEST(ts->calib.a * counter + ts->calib.b, 1000000); + c1 = c1 ?: 1; + c2 = DIV_ROUND_CLOSEST(ts->calib.p, c1); + c3 = c1 * ts->calib.m; + c4 = ts->calib.n; + + *temp = DIV_ROUND_CLOSEST(c1 * (c2 + c3 + c4), 1000); + + return 0; +} + +static int tegra_tsensor_temp_to_counter(const struct tegra_tsensor *ts, int temp) +{ + int c1, c2; + + c1 = DIV_ROUND_CLOSEST(ts->calib.p - temp * 1000, ts->calib.m); + c2 = -ts->calib.r - int_sqrt(ts->calib.r * ts->calib.r - c1); + + return DIV_ROUND_CLOSEST(c2 * 1000000 - ts->calib.b, ts->calib.a); +} + +static int tegra_tsensor_set_trips(void *data, int low, int high) +{ + const struct tegra_tsensor_channel *tsc = data; + const struct tegra_tsensor *ts = tsc->ts; + u32 val; + + /* + * TSENSOR doesn't trigger interrupt on the "low" temperature breach, + * hence bail out if high temperature is unspecified. + */ + if (high == INT_MAX) + return 0; + + val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG1); + val &= ~TSENSOR_SENSOR0_CONFIG1_TH1; + + high = tegra_tsensor_temp_to_counter(ts, high); + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG1_TH1, high); + writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG1); + + return 0; +} + +static const struct thermal_zone_of_device_ops ops = { + .get_temp = tegra_tsensor_get_temp, + .set_trips = tegra_tsensor_set_trips, +}; + +static bool +tegra_tsensor_handle_channel_interrupt(const struct tegra_tsensor *ts, + unsigned int id) +{ + const struct tegra_tsensor_channel *tsc = &ts->ch[id]; + u32 val; + + val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_STATUS0); + writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_STATUS0); + + if (FIELD_GET(TSENSOR_SENSOR0_STATUS0_STATE, val) == 5) + dev_err_ratelimited(ts->dev, "ch%u: counter overflowed\n", id); + + if (!FIELD_GET(TSENSOR_SENSOR0_STATUS0_INTR, val)) + return false; + + thermal_zone_device_update(tsc->tzd, THERMAL_EVENT_UNSPECIFIED); + + return true; +} + +static irqreturn_t tegra_tsensor_isr(int irq, void *data) +{ + const struct tegra_tsensor *ts = data; + bool handled = false; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(ts->ch); i++) + handled |= tegra_tsensor_handle_channel_interrupt(ts, i); + + return handled ? IRQ_HANDLED : IRQ_NONE; +} + +static int tegra_tsensor_disable_hw_channel(const struct tegra_tsensor *ts, + unsigned int id) +{ + const struct tegra_tsensor_channel *tsc = &ts->ch[id]; + struct thermal_zone_device *tzd = tsc->tzd; + u32 val; + int err; + + if (!tzd) + goto stop_channel; + + err = thermal_zone_device_disable(tzd); + if (err) { + dev_err(ts->dev, "ch%u: failed to disable zone: %d\n", id, err); + return err; + } + +stop_channel: + /* stop channel gracefully */ + val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG0); + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_SENSOR_STOP, 1); + writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG0); + + return 0; +} + +static void tegra_tsensor_get_hw_channel_trips(struct thermal_zone_device *tzd, + int *hot_trip, int *crit_trip) +{ + unsigned int i; + + /* + * 90C is the maximal critical temperature of all Tegra30 SoC variants, + * use it for the default trip if unspecified in a device-tree. + */ + *hot_trip = 85000; + *crit_trip = 90000; + + for (i = 0; i < tzd->trips; i++) { + enum thermal_trip_type type; + int trip_temp; + + tzd->ops->get_trip_temp(tzd, i, &trip_temp); + tzd->ops->get_trip_type(tzd, i, &type); + + if (type == THERMAL_TRIP_HOT) + *hot_trip = trip_temp; + + if (type == THERMAL_TRIP_CRITICAL) + *crit_trip = trip_temp; + } + + /* clamp hardware trips to the calibration limits */ + *hot_trip = clamp(*hot_trip, 25000, 90000); + + /* + * Kernel will perform a normal system shut down if it will + * see that critical temperature is breached, hence set the + * hardware limit by 5C higher in order to allow system to + * shut down gracefully before sending signal to the Power + * Management controller. + */ + *crit_trip = clamp(*crit_trip + 5000, 25000, 90000); +} + +static int tegra_tsensor_enable_hw_channel(const struct tegra_tsensor *ts, + unsigned int id) +{ + const struct tegra_tsensor_channel *tsc = &ts->ch[id]; + struct thermal_zone_device *tzd = tsc->tzd; + int err, hot_trip = 0, crit_trip = 0; + u32 val; + + if (!tzd) { + val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG0); + val &= ~TSENSOR_SENSOR0_CONFIG0_SENSOR_STOP; + writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG0); + + return 0; + } + + tegra_tsensor_get_hw_channel_trips(tzd, &hot_trip, &crit_trip); + + /* prevent potential racing with tegra_tsensor_set_trips() */ + mutex_lock(&tzd->lock); + + dev_info_once(ts->dev, "ch%u: PMC emergency shutdown trip set to %dC\n", + id, DIV_ROUND_CLOSEST(crit_trip, 1000)); + + hot_trip = tegra_tsensor_temp_to_counter(ts, hot_trip); + crit_trip = tegra_tsensor_temp_to_counter(ts, crit_trip); + + /* program LEVEL2 counter threshold */ + val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG1); + val &= ~TSENSOR_SENSOR0_CONFIG1_TH2; + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG1_TH2, hot_trip); + writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG1); + + /* program LEVEL3 counter threshold */ + val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG2); + val &= ~TSENSOR_SENSOR0_CONFIG2_TH3; + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG2_TH3, crit_trip); + writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG2); + + /* + * Enable sensor, emergency shutdown, interrupts for level 1/2/3 + * breaches and counter overflow condition. + * + * Disable DIV2 throttle for now since we need to figure out how + * to integrate it properly with the thermal framework. + * + * Thermal levels supported by hardware: + * + * Level 0 = cold + * Level 1 = passive cooling (cpufreq DVFS) + * Level 2 = passive cooling assisted by hardware (DIV2) + * Level 3 = emergency shutdown assisted by hardware (PMC) + */ + val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG0); + val &= ~TSENSOR_SENSOR0_CONFIG0_SENSOR_STOP; + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_DVFS_EN, 1); + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_HW_FREQ_DIV_EN, 0); + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_THERMAL_RST_EN, 1); + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_INTR_OVERFLOW_EN, 1); + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_INTR_HW_FREQ_DIV_EN, 1); + val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_INTR_THERMAL_RST_EN, 1); + writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG0); + + mutex_unlock(&tzd->lock); + + err = thermal_zone_device_enable(tzd); + if (err) { + dev_err(ts->dev, "ch%u: failed to enable zone: %d\n", id, err); + return err; + } + + return 0; +} + +static bool tegra_tsensor_fuse_read_spare(unsigned int spare) +{ + u32 val = 0; + + tegra_fuse_readl(TEGRA30_FUSE_SPARE_BIT + spare * 4, &val); + + return !!val; +} + +static int tegra_tsensor_nvmem_setup(struct tegra_tsensor *ts) +{ + u32 i, ate_ver = 0, cal = 0, t1_25C = 0, t2_90C = 0; + int err, c1_25C, c2_90C; + + err = tegra_fuse_readl(TEGRA30_FUSE_TEST_PROG_VER, &ate_ver); + if (err) { + dev_err_probe(ts->dev, err, "failed to get ATE version\n"); + return err; + } + + if (ate_ver < 8) { + dev_info(ts->dev, "unsupported ATE version: %u\n", ate_ver); + return -ENODEV; + } + + /* + * We have two TSENSOR channels in a two different spots on SoC. + * Second channel provides more accurate data on older SoC versions, + * use it as a primary channel. + */ + if (ate_ver <= 21) { + dev_info_once(ts->dev, + "older ATE version detected, channels remapped\n"); + ts->swap_channels = true; + } + + err = tegra_fuse_readl(TEGRA30_FUSE_TSENSOR_CALIB, &cal); + if (err) { + dev_err(ts->dev, "failed to get calibration data: %d\n", err); + return err; + } + + /* get calibrated counter values for 25C/90C thresholds */ + c1_25C = FIELD_GET(TEGRA30_FUSE_TSENSOR_CALIB_LOW, cal); + c2_90C = FIELD_GET(TEGRA30_FUSE_TSENSOR_CALIB_HIGH, cal); + + /* and calibrated temperatures corresponding to the counter values */ + for (i = 0; i < 7; i++) { + t1_25C |= tegra_tsensor_fuse_read_spare(14 + i) << i; + t1_25C |= tegra_tsensor_fuse_read_spare(21 + i) << i; + + t2_90C |= tegra_tsensor_fuse_read_spare(0 + i) << i; + t2_90C |= tegra_tsensor_fuse_read_spare(7 + i) << i; + } + + if (c2_90C - c1_25C <= t2_90C - t1_25C) { + dev_err(ts->dev, "invalid calibration data: %d %d %u %u\n", + c2_90C, c1_25C, t2_90C, t1_25C); + return -EINVAL; + } + + /* all calibration coefficients are premultiplied by 1000000 */ + + ts->calib.a = DIV_ROUND_CLOSEST((t2_90C - t1_25C) * 1000000, + (c2_90C - c1_25C)); + + ts->calib.b = t1_25C * 1000000 - ts->calib.a * c1_25C; + + if (tegra_sku_info.revision == TEGRA_REVISION_A01) { + ts->calib.m = -2775; + ts->calib.n = 1338811; + ts->calib.p = -7300000; + } else { + ts->calib.m = -3512; + ts->calib.n = 1528943; + ts->calib.p = -11100000; + } + + /* except the coefficient of a reduced quadratic equation */ + ts->calib.r = DIV_ROUND_CLOSEST(ts->calib.n, ts->calib.m * 2); + + dev_info_once(ts->dev, + "calibration: %d %d %u %u ATE ver: %u SoC rev: %u\n", + c2_90C, c1_25C, t2_90C, t1_25C, ate_ver, + tegra_sku_info.revision); + + return 0; +} + +static int tegra_tsensor_register_channel(struct tegra_tsensor *ts, + unsigned int id) +{ + struct tegra_tsensor_channel *tsc = &ts->ch[id]; + unsigned int hw_id = ts->swap_channels ? !id : id; + + tsc->ts = ts; + tsc->id = id; + tsc->regs = ts->regs + 0x40 * (hw_id + 1); + + tsc->tzd = devm_thermal_zone_of_sensor_register(ts->dev, id, tsc, &ops); + if (IS_ERR(tsc->tzd)) { + if (PTR_ERR(tsc->tzd) != -ENODEV) + return dev_err_probe(ts->dev, PTR_ERR(tsc->tzd), + "failed to register thermal zone\n"); + + /* + * It's okay if sensor isn't assigned to any thermal zone + * in a device-tree. + */ + tsc->tzd = NULL; + return 0; + } + + if (devm_thermal_add_hwmon_sysfs(tsc->tzd)) + dev_warn(ts->dev, "failed to add hwmon sysfs attributes\n"); + + return 0; +} + +static int tegra_tsensor_probe(struct platform_device *pdev) +{ + struct tegra_tsensor *ts; + unsigned int i; + int err, irq; + + ts = devm_kzalloc(&pdev->dev, sizeof(*ts), GFP_KERNEL); + if (!ts) + return -ENOMEM; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + ts->dev = &pdev->dev; + platform_set_drvdata(pdev, ts); + + ts->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(ts->regs)) + return PTR_ERR(ts->regs); + + ts->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(ts->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(ts->clk), + "failed to get clock\n"); + + ts->rst = devm_reset_control_get_exclusive(&pdev->dev, NULL); + if (IS_ERR(ts->rst)) + return dev_err_probe(&pdev->dev, PTR_ERR(ts->rst), + "failed to get reset control\n"); + + err = tegra_tsensor_nvmem_setup(ts); + if (err) + return err; + + err = tegra_tsensor_hw_enable(ts); + if (err) + return err; + + err = devm_add_action_or_reset(&pdev->dev, + devm_tegra_tsensor_hw_disable, + ts); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(ts->ch); i++) { + err = tegra_tsensor_register_channel(ts, i); + if (err) + return err; + } + + err = devm_request_threaded_irq(&pdev->dev, irq, NULL, + tegra_tsensor_isr, IRQF_ONESHOT, + "tegra_tsensor", ts); + if (err) + return dev_err_probe(&pdev->dev, err, + "failed to request interrupt\n"); + + for (i = 0; i < ARRAY_SIZE(ts->ch); i++) { + err = tegra_tsensor_enable_hw_channel(ts, i); + if (err) + return err; + } + + return 0; +} + +static int __maybe_unused tegra_tsensor_suspend(struct device *dev) +{ + struct tegra_tsensor *ts = dev_get_drvdata(dev); + unsigned int i; + int err; + + for (i = 0; i < ARRAY_SIZE(ts->ch); i++) { + err = tegra_tsensor_disable_hw_channel(ts, i); + if (err) + goto enable_channel; + } + + err = tegra_tsensor_hw_disable(ts); + if (err) + goto enable_channel; + + return 0; + +enable_channel: + while (i--) + tegra_tsensor_enable_hw_channel(ts, i); + + return err; +} + +static int __maybe_unused tegra_tsensor_resume(struct device *dev) +{ + struct tegra_tsensor *ts = dev_get_drvdata(dev); + unsigned int i; + int err; + + err = tegra_tsensor_hw_enable(ts); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(ts->ch); i++) { + err = tegra_tsensor_enable_hw_channel(ts, i); + if (err) + return err; + } + + return 0; +} + +static const struct dev_pm_ops tegra_tsensor_pm_ops = { + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(tegra_tsensor_suspend, + tegra_tsensor_resume) +}; + +static const struct of_device_id tegra_tsensor_of_match[] = { + { .compatible = "nvidia,tegra30-tsensor", }, + {}, +}; +MODULE_DEVICE_TABLE(of, tegra_tsensor_of_match); + +static struct platform_driver tegra_tsensor_driver = { + .probe = tegra_tsensor_probe, + .driver = { + .name = "tegra30-tsensor", + .of_match_table = tegra_tsensor_of_match, + .pm = &tegra_tsensor_pm_ops, + }, +}; +module_platform_driver(tegra_tsensor_driver); + +MODULE_DESCRIPTION("NVIDIA Tegra30 Thermal Sensor driver"); +MODULE_AUTHOR("Dmitry Osipenko <digetx@gmail.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index a503c1b2bfd9..3d91982d8371 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -33,6 +33,16 @@ config VDPA_SIM_BLOCK vDPA block device simulator which terminates IO request in a memory buffer. +config VDPA_USER + tristate "VDUSE (vDPA Device in Userspace) support" + depends on EVENTFD && MMU && HAS_DMA + select DMA_OPS + select VHOST_IOTLB + select IOMMU_IOVA + help + With VDUSE it is possible to emulate a vDPA Device + in a userspace program. + config IFCVF tristate "Intel IFC VF vDPA driver" depends on PCI_MSI @@ -53,6 +63,7 @@ config MLX5_VDPA config MLX5_VDPA_NET tristate "vDPA driver for ConnectX devices" select MLX5_VDPA + select VHOST_RING depends on MLX5_CORE help VDPA network driver for ConnectX6 and newer. Provides offloading diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile index 67fe7f3d6943..f02ebed33f19 100644 --- a/drivers/vdpa/Makefile +++ b/drivers/vdpa/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_VDPA) += vdpa.o obj-$(CONFIG_VDPA_SIM) += vdpa_sim/ +obj-$(CONFIG_VDPA_USER) += vdpa_user/ obj-$(CONFIG_IFCVF) += ifcvf/ obj-$(CONFIG_MLX5_VDPA) += mlx5/ obj-$(CONFIG_VP_VDPA) += virtio_pci/ diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c index 6e197fe0fcf9..2808f1ba9f7b 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.c +++ b/drivers/vdpa/ifcvf/ifcvf_base.c @@ -158,7 +158,9 @@ next: return -EIO; } - for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) { + hw->nr_vring = ifc_ioread16(&hw->common_cfg->num_queues); + + for (i = 0; i < hw->nr_vring; i++) { ifc_iowrite16(i, &hw->common_cfg->queue_select); notify_off = ifc_ioread16(&hw->common_cfg->queue_notify_off); hw->vring[i].notify_addr = hw->notify_base + @@ -304,7 +306,7 @@ u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid) u32 q_pair_id; ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; - q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2); + q_pair_id = qid / hw->nr_vring; avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2]; last_avail_idx = ifc_ioread16(avail_idx_addr); @@ -318,7 +320,7 @@ int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num) u32 q_pair_id; ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; - q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2); + q_pair_id = qid / hw->nr_vring; avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2]; hw->vring[qid].last_avail_idx = num; ifc_iowrite16(num, avail_idx_addr); diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h index 2996db0da490..09918af3ecf8 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.h +++ b/drivers/vdpa/ifcvf/ifcvf_base.h @@ -22,17 +22,8 @@ #define N3000_DEVICE_ID 0x1041 #define N3000_SUBSYS_DEVICE_ID 0x001A -#define IFCVF_NET_SUPPORTED_FEATURES \ - ((1ULL << VIRTIO_NET_F_MAC) | \ - (1ULL << VIRTIO_F_ANY_LAYOUT) | \ - (1ULL << VIRTIO_F_VERSION_1) | \ - (1ULL << VIRTIO_NET_F_STATUS) | \ - (1ULL << VIRTIO_F_ORDER_PLATFORM) | \ - (1ULL << VIRTIO_F_ACCESS_PLATFORM) | \ - (1ULL << VIRTIO_NET_F_MRG_RXBUF)) - -/* Only one queue pair for now. */ -#define IFCVF_MAX_QUEUE_PAIRS 1 +/* Max 8 data queue pairs(16 queues) and one control vq for now. */ +#define IFCVF_MAX_QUEUES 17 #define IFCVF_QUEUE_ALIGNMENT PAGE_SIZE #define IFCVF_QUEUE_MAX 32768 @@ -51,8 +42,6 @@ #define ifcvf_private_to_vf(adapter) \ (&((struct ifcvf_adapter *)adapter)->vf) -#define IFCVF_MAX_INTR (IFCVF_MAX_QUEUE_PAIRS * 2 + 1) - struct vring_info { u64 desc; u64 avail; @@ -83,7 +72,7 @@ struct ifcvf_hw { u32 dev_type; struct virtio_pci_common_cfg __iomem *common_cfg; void __iomem *net_cfg; - struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2]; + struct vring_info vring[IFCVF_MAX_QUEUES]; void __iomem * const *base; char config_msix_name[256]; struct vdpa_callback config_cb; @@ -103,7 +92,13 @@ struct ifcvf_vring_lm_cfg { struct ifcvf_lm_cfg { u8 reserved[IFCVF_LM_RING_STATE_OFFSET]; - struct ifcvf_vring_lm_cfg vring_lm_cfg[IFCVF_MAX_QUEUE_PAIRS]; + struct ifcvf_vring_lm_cfg vring_lm_cfg[IFCVF_MAX_QUEUES]; +}; + +struct ifcvf_vdpa_mgmt_dev { + struct vdpa_mgmt_dev mdev; + struct ifcvf_adapter *adapter; + struct pci_dev *pdev; }; int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev); diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 351c6cfb24c3..dcd648e1f7e7 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -63,9 +63,13 @@ static int ifcvf_request_irq(struct ifcvf_adapter *adapter) struct pci_dev *pdev = adapter->pdev; struct ifcvf_hw *vf = &adapter->vf; int vector, i, ret, irq; + u16 max_intr; - ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR, - IFCVF_MAX_INTR, PCI_IRQ_MSIX); + /* all queues and config interrupt */ + max_intr = vf->nr_vring + 1; + + ret = pci_alloc_irq_vectors(pdev, max_intr, + max_intr, PCI_IRQ_MSIX); if (ret < 0) { IFCVF_ERR(pdev, "Failed to alloc IRQ vectors\n"); return ret; @@ -83,7 +87,7 @@ static int ifcvf_request_irq(struct ifcvf_adapter *adapter) return ret; } - for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) { + for (i = 0; i < vf->nr_vring; i++) { snprintf(vf->vring[i].msix_name, 256, "ifcvf[%s]-%d\n", pci_name(pdev), i); vector = i + IFCVF_MSI_QUEUE_OFF; @@ -112,7 +116,6 @@ static int ifcvf_start_datapath(void *private) u8 status; int ret; - vf->nr_vring = IFCVF_MAX_QUEUE_PAIRS * 2; ret = ifcvf_start_hw(vf); if (ret < 0) { status = ifcvf_get_status(vf); @@ -128,7 +131,7 @@ static int ifcvf_stop_datapath(void *private) struct ifcvf_hw *vf = ifcvf_private_to_vf(private); int i; - for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) + for (i = 0; i < vf->nr_vring; i++) vf->vring[i].cb.callback = NULL; ifcvf_stop_hw(vf); @@ -141,7 +144,7 @@ static void ifcvf_reset_vring(struct ifcvf_adapter *adapter) struct ifcvf_hw *vf = ifcvf_private_to_vf(adapter); int i; - for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) { + for (i = 0; i < vf->nr_vring; i++) { vf->vring[i].last_avail_idx = 0; vf->vring[i].desc = 0; vf->vring[i].avail = 0; @@ -171,17 +174,12 @@ static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev) struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev); struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); struct pci_dev *pdev = adapter->pdev; - + u32 type = vf->dev_type; u64 features; - switch (vf->dev_type) { - case VIRTIO_ID_NET: - features = ifcvf_get_features(vf) & IFCVF_NET_SUPPORTED_FEATURES; - break; - case VIRTIO_ID_BLOCK: + if (type == VIRTIO_ID_NET || type == VIRTIO_ID_BLOCK) features = ifcvf_get_features(vf); - break; - default: + else { features = 0; IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", vf->dev_type); } @@ -218,23 +216,12 @@ static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) int ret; vf = vdpa_to_vf(vdpa_dev); - adapter = dev_get_drvdata(vdpa_dev->dev.parent); + adapter = vdpa_to_adapter(vdpa_dev); status_old = ifcvf_get_status(vf); if (status_old == status) return; - if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && - !(status & VIRTIO_CONFIG_S_DRIVER_OK)) { - ifcvf_stop_datapath(adapter); - ifcvf_free_irq(adapter, IFCVF_MAX_QUEUE_PAIRS * 2); - } - - if (status == 0) { - ifcvf_reset_vring(adapter); - return; - } - if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) { ret = ifcvf_request_irq(adapter); @@ -254,6 +241,29 @@ static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) ifcvf_set_status(vf, status); } +static int ifcvf_vdpa_reset(struct vdpa_device *vdpa_dev) +{ + struct ifcvf_adapter *adapter; + struct ifcvf_hw *vf; + u8 status_old; + + vf = vdpa_to_vf(vdpa_dev); + adapter = vdpa_to_adapter(vdpa_dev); + status_old = ifcvf_get_status(vf); + + if (status_old == 0) + return 0; + + if (status_old & VIRTIO_CONFIG_S_DRIVER_OK) { + ifcvf_stop_datapath(adapter); + ifcvf_free_irq(adapter, vf->nr_vring); + } + + ifcvf_reset_vring(adapter); + + return 0; +} + static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev) { return IFCVF_QUEUE_MAX; @@ -437,6 +447,7 @@ static const struct vdpa_config_ops ifc_vdpa_ops = { .set_features = ifcvf_vdpa_set_features, .get_status = ifcvf_vdpa_get_status, .set_status = ifcvf_vdpa_set_status, + .reset = ifcvf_vdpa_reset, .get_vq_num_max = ifcvf_vdpa_get_vq_num_max, .get_vq_state = ifcvf_vdpa_get_vq_state, .set_vq_state = ifcvf_vdpa_set_vq_state, @@ -458,63 +469,63 @@ static const struct vdpa_config_ops ifc_vdpa_ops = { .get_vq_notification = ifcvf_get_vq_notification, }; -static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) +static struct virtio_device_id id_table_net[] = { + {VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID}, + {0}, +}; + +static struct virtio_device_id id_table_blk[] = { + {VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID}, + {0}, +}; + +static u32 get_dev_type(struct pci_dev *pdev) { - struct device *dev = &pdev->dev; - struct ifcvf_adapter *adapter; - struct ifcvf_hw *vf; - int ret, i; + u32 dev_type; - ret = pcim_enable_device(pdev); - if (ret) { - IFCVF_ERR(pdev, "Failed to enable device\n"); - return ret; - } + /* This drirver drives both modern virtio devices and transitional + * devices in modern mode. + * vDPA requires feature bit VIRTIO_F_ACCESS_PLATFORM, + * so legacy devices and transitional devices in legacy + * mode will not work for vDPA, this driver will not + * drive devices with legacy interface. + */ - ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4), - IFCVF_DRIVER_NAME); - if (ret) { - IFCVF_ERR(pdev, "Failed to request MMIO region\n"); - return ret; - } + if (pdev->device < 0x1040) + dev_type = pdev->subsystem_device; + else + dev_type = pdev->device - 0x1040; - ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); - if (ret) { - IFCVF_ERR(pdev, "No usable DMA configuration\n"); - return ret; - } + return dev_type; +} - ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev); - if (ret) { - IFCVF_ERR(pdev, - "Failed for adding devres for freeing irq vectors\n"); - return ret; - } +static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) +{ + struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev; + struct ifcvf_adapter *adapter; + struct pci_dev *pdev; + struct ifcvf_hw *vf; + struct device *dev; + int ret, i; + ifcvf_mgmt_dev = container_of(mdev, struct ifcvf_vdpa_mgmt_dev, mdev); + if (ifcvf_mgmt_dev->adapter) + return -EOPNOTSUPP; + + pdev = ifcvf_mgmt_dev->pdev; + dev = &pdev->dev; adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, - dev, &ifc_vdpa_ops, NULL); + dev, &ifc_vdpa_ops, name, false); if (IS_ERR(adapter)) { IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); return PTR_ERR(adapter); } - pci_set_master(pdev); - pci_set_drvdata(pdev, adapter); + ifcvf_mgmt_dev->adapter = adapter; + pci_set_drvdata(pdev, ifcvf_mgmt_dev); vf = &adapter->vf; - - /* This drirver drives both modern virtio devices and transitional - * devices in modern mode. - * vDPA requires feature bit VIRTIO_F_ACCESS_PLATFORM, - * so legacy devices and transitional devices in legacy - * mode will not work for vDPA, this driver will not - * drive devices with legacy interface. - */ - if (pdev->device < 0x1040) - vf->dev_type = pdev->subsystem_device; - else - vf->dev_type = pdev->device - 0x1040; - + vf->dev_type = get_dev_type(pdev); vf->base = pcim_iomap_table(pdev); adapter->pdev = pdev; @@ -526,14 +537,15 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err; } - for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) + for (i = 0; i < vf->nr_vring; i++) vf->vring[i].irq = -EINVAL; vf->hw_features = ifcvf_get_hw_features(vf); - ret = vdpa_register_device(&adapter->vdpa, IFCVF_MAX_QUEUE_PAIRS * 2); + adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev; + ret = _vdpa_register_device(&adapter->vdpa, vf->nr_vring); if (ret) { - IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus"); + IFCVF_ERR(pdev, "Failed to register to vDPA bus"); goto err; } @@ -544,11 +556,100 @@ err: return ret; } +static void ifcvf_vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev) +{ + struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev; + + ifcvf_mgmt_dev = container_of(mdev, struct ifcvf_vdpa_mgmt_dev, mdev); + _vdpa_unregister_device(dev); + ifcvf_mgmt_dev->adapter = NULL; +} + +static const struct vdpa_mgmtdev_ops ifcvf_vdpa_mgmt_dev_ops = { + .dev_add = ifcvf_vdpa_dev_add, + .dev_del = ifcvf_vdpa_dev_del +}; + +static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev; + struct device *dev = &pdev->dev; + u32 dev_type; + int ret; + + ifcvf_mgmt_dev = kzalloc(sizeof(struct ifcvf_vdpa_mgmt_dev), GFP_KERNEL); + if (!ifcvf_mgmt_dev) { + IFCVF_ERR(pdev, "Failed to alloc memory for the vDPA management device\n"); + return -ENOMEM; + } + + dev_type = get_dev_type(pdev); + switch (dev_type) { + case VIRTIO_ID_NET: + ifcvf_mgmt_dev->mdev.id_table = id_table_net; + break; + case VIRTIO_ID_BLOCK: + ifcvf_mgmt_dev->mdev.id_table = id_table_blk; + break; + default: + IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", dev_type); + ret = -EOPNOTSUPP; + goto err; + } + + ifcvf_mgmt_dev->mdev.ops = &ifcvf_vdpa_mgmt_dev_ops; + ifcvf_mgmt_dev->mdev.device = dev; + ifcvf_mgmt_dev->pdev = pdev; + + ret = pcim_enable_device(pdev); + if (ret) { + IFCVF_ERR(pdev, "Failed to enable device\n"); + goto err; + } + + ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4), + IFCVF_DRIVER_NAME); + if (ret) { + IFCVF_ERR(pdev, "Failed to request MMIO region\n"); + goto err; + } + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); + if (ret) { + IFCVF_ERR(pdev, "No usable DMA configuration\n"); + goto err; + } + + ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev); + if (ret) { + IFCVF_ERR(pdev, + "Failed for adding devres for freeing irq vectors\n"); + goto err; + } + + pci_set_master(pdev); + + ret = vdpa_mgmtdev_register(&ifcvf_mgmt_dev->mdev); + if (ret) { + IFCVF_ERR(pdev, + "Failed to initialize the management interfaces\n"); + goto err; + } + + return 0; + +err: + kfree(ifcvf_mgmt_dev); + return ret; +} + static void ifcvf_remove(struct pci_dev *pdev) { - struct ifcvf_adapter *adapter = pci_get_drvdata(pdev); + struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev; - vdpa_unregister_device(&adapter->vdpa); + ifcvf_mgmt_dev = pci_get_drvdata(pdev); + vdpa_mgmtdev_unregister(&ifcvf_mgmt_dev->mdev); + kfree(ifcvf_mgmt_dev); } static struct pci_device_id ifcvf_pci_ids[] = { diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index 0002b2136b48..01a848adf590 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -5,7 +5,7 @@ #define __MLX5_VDPA_H__ #include <linux/etherdevice.h> -#include <linux/if_vlan.h> +#include <linux/vringh.h> #include <linux/vdpa.h> #include <linux/mlx5/driver.h> @@ -48,6 +48,26 @@ struct mlx5_vdpa_resources { bool valid; }; +struct mlx5_control_vq { + struct vhost_iotlb *iotlb; + /* spinlock to synchronize iommu table */ + spinlock_t iommu_lock; + struct vringh vring; + bool ready; + u64 desc_addr; + u64 device_addr; + u64 driver_addr; + struct vdpa_callback event_cb; + struct vringh_kiov riov; + struct vringh_kiov wiov; + unsigned short head; +}; + +struct mlx5_ctrl_wq_ent { + struct work_struct work; + struct mlx5_vdpa_dev *mvdev; +}; + struct mlx5_vdpa_dev { struct vdpa_device vdev; struct mlx5_core_dev *mdev; @@ -57,9 +77,12 @@ struct mlx5_vdpa_dev { u64 actual_features; u8 status; u32 max_vqs; + u16 max_idx; u32 generation; struct mlx5_vdpa_mr mr; + struct mlx5_control_vq cvq; + struct workqueue_struct *wq; }; int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid); @@ -68,6 +91,7 @@ int mlx5_vdpa_get_null_mkey(struct mlx5_vdpa_dev *dev, u32 *null_mkey); int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn); void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn); int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn); +int mlx5_vdpa_modify_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 rqtn); void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn); int mlx5_vdpa_create_tir(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tirn); void mlx5_vdpa_destroy_tir(struct mlx5_vdpa_dev *mvdev, u32 tirn); diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index e59135fa867e..ff010c6d0cd3 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2020 Mellanox Technologies Ltd. */ +#include <linux/vhost_types.h> #include <linux/vdpa.h> #include <linux/gcd.h> #include <linux/string.h> @@ -451,33 +452,30 @@ static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey); } -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src) { - struct mlx5_vdpa_mr *mr = &mvdev->mr; + struct vhost_iotlb_map *map; + u64 start = 0, last = ULLONG_MAX; int err; - if (mr->initialized) - return 0; - - if (iotlb) - err = create_user_mr(mvdev, iotlb); - else - err = create_dma_mr(mvdev, mr); - - if (!err) - mr->initialized = true; + if (!src) { + err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW); + return err; + } - return err; + for (map = vhost_iotlb_itree_first(src, start, last); map; + map = vhost_iotlb_itree_next(map, start, last)) { + err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last, + map->addr, map->perm); + if (err) + return err; + } + return 0; } -int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +static void prune_iotlb(struct mlx5_vdpa_dev *mvdev) { - int err; - - mutex_lock(&mvdev->mr.mkey_mtx); - err = _mlx5_vdpa_create_mr(mvdev, iotlb); - mutex_unlock(&mvdev->mr.mkey_mtx); - return err; + vhost_iotlb_del_range(mvdev->cvq.iotlb, 0, ULLONG_MAX); } static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) @@ -501,6 +499,7 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) if (!mr->initialized) goto out; + prune_iotlb(mvdev); if (mr->user_mr) destroy_user_mr(mvdev, mr); else @@ -512,6 +511,48 @@ out: mutex_unlock(&mr->mkey_mtx); } +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + int err; + + if (mr->initialized) + return 0; + + if (iotlb) + err = create_user_mr(mvdev, iotlb); + else + err = create_dma_mr(mvdev, mr); + + if (err) + return err; + + err = dup_iotlb(mvdev, iotlb); + if (err) + goto out_err; + + mr->initialized = true; + return 0; + +out_err: + if (iotlb) + destroy_user_mr(mvdev, mr); + else + destroy_dma_mr(mvdev, mr); + + return err; +} + +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +{ + int err; + + mutex_lock(&mvdev->mr.mkey_mtx); + err = _mlx5_vdpa_create_mr(mvdev, iotlb); + mutex_unlock(&mvdev->mr.mkey_mtx); + return err; +} + int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, bool *change_map) { diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c index d4606213f88a..15e266d0e27a 100644 --- a/drivers/vdpa/mlx5/core/resources.c +++ b/drivers/vdpa/mlx5/core/resources.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2020 Mellanox Technologies Ltd. */ +#include <linux/iova.h> #include <linux/mlx5/driver.h> #include "mlx5_vdpa.h" @@ -128,6 +129,16 @@ int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 * return err; } +int mlx5_vdpa_modify_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 rqtn) +{ + u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {}; + + MLX5_SET(modify_rqt_in, in, uid, mvdev->res.uid); + MLX5_SET(modify_rqt_in, in, rqtn, rqtn); + MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT); + return mlx5_cmd_exec(mvdev->mdev, in, inlen, out, sizeof(out)); +} + void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn) { u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {}; @@ -221,6 +232,22 @@ int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *m return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in); } +static int init_ctrl_vq(struct mlx5_vdpa_dev *mvdev) +{ + mvdev->cvq.iotlb = vhost_iotlb_alloc(0, 0); + if (!mvdev->cvq.iotlb) + return -ENOMEM; + + vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb, &mvdev->cvq.iommu_lock); + + return 0; +} + +static void cleanup_ctrl_vq(struct mlx5_vdpa_dev *mvdev) +{ + vhost_iotlb_free(mvdev->cvq.iotlb); +} + int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) { u64 offset = MLX5_CAP64_DEV_VDPA_EMULATION(mvdev->mdev, doorbell_bar_offset); @@ -260,10 +287,17 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) err = -ENOMEM; goto err_key; } + + err = init_ctrl_vq(mvdev); + if (err) + goto err_ctrl; + res->valid = true; return 0; +err_ctrl: + iounmap(res->kick_addr); err_key: dealloc_pd(mvdev, res->pdn, res->uid); err_pd: @@ -282,6 +316,7 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev) if (!res->valid) return; + cleanup_ctrl_vq(mvdev); iounmap(res->kick_addr); res->kick_addr = NULL; dealloc_pd(mvdev, res->pdn, res->uid); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 5906cada2293..294ba05e6fc9 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -45,6 +45,8 @@ MODULE_LICENSE("Dual BSD/GPL"); (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \ VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED) +#define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature))) + struct mlx5_vdpa_net_resources { u32 tisn; u32 tdn; @@ -90,7 +92,6 @@ struct mlx5_vq_restore_info { u16 avail_index; u16 used_index; bool ready; - struct vdpa_callback cb; bool restore; }; @@ -100,7 +101,6 @@ struct mlx5_vdpa_virtqueue { u64 device_addr; u64 driver_addr; u32 num_ent; - struct vdpa_callback event_cb; /* Resources for implementing the notification channel from the device * to the driver. fwqp is the firmware end of an RC connection; the @@ -135,11 +135,20 @@ struct mlx5_vdpa_virtqueue { */ #define MLX5_MAX_SUPPORTED_VQS 16 +static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) +{ + if (unlikely(idx > mvdev->max_idx)) + return false; + + return true; +} + struct mlx5_vdpa_net { struct mlx5_vdpa_dev mvdev; struct mlx5_vdpa_net_resources res; struct virtio_net_config config; struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS]; + struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1]; /* Serialize vq resources creation and destruction. This is required * since memory map might change and we need to destroy and create @@ -151,15 +160,18 @@ struct mlx5_vdpa_net { struct mlx5_flow_handle *rx_rule; bool setup; u16 mtu; + u32 cur_num_vqs; }; static void free_resources(struct mlx5_vdpa_net *ndev); static void init_mvqs(struct mlx5_vdpa_net *ndev); -static int setup_driver(struct mlx5_vdpa_net *ndev); +static int setup_driver(struct mlx5_vdpa_dev *mvdev); static void teardown_driver(struct mlx5_vdpa_net *ndev); static bool mlx5_vdpa_debug; +#define MLX5_CVQ_MAX_ENT 16 + #define MLX5_LOG_VIO_FLAG(_feature) \ do { \ if (features & BIT_ULL(_feature)) \ @@ -172,11 +184,41 @@ static bool mlx5_vdpa_debug; mlx5_vdpa_info(mvdev, "%s\n", #_status); \ } while (0) +/* TODO: cross-endian support */ +static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev) +{ + return virtio_legacy_is_little_endian() || + (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1)); +} + +static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val) +{ + return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val); +} + +static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val) +{ + return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val); +} + static inline u32 mlx5_vdpa_max_qps(int max_vqs) { return max_vqs / 2; } +static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev) +{ + if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) + return 2; + + return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); +} + +static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx) +{ + return idx == ctrl_vq_idx(mvdev); +} + static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set) { if (status & ~VALID_STATUS_MASK) @@ -481,6 +523,10 @@ static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) { + struct mlx5_vdpa_net *ndev = mvq->ndev; + struct vdpa_callback *event_cb; + + event_cb = &ndev->event_cbs[mvq->index]; mlx5_cq_set_ci(&mvq->cq.mcq); /* make sure CQ cosumer update is visible to the hardware before updating @@ -488,8 +534,8 @@ static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int nu */ dma_wmb(); rx_post(&mvq->vqqp, num); - if (mvq->event_cb.callback) - mvq->event_cb.callback(mvq->event_cb.private); + if (event_cb->callback) + event_cb->callback(event_cb->private); } static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) @@ -1100,10 +1146,8 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) if (!mvq->num_ent) return 0; - if (mvq->initialized) { - mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n"); - return -EINVAL; - } + if (mvq->initialized) + return 0; err = cq_create(ndev, idx, mvq->num_ent); if (err) @@ -1190,19 +1234,20 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue * static int create_rqt(struct mlx5_vdpa_net *ndev) { - int log_max_rqt; __be32 *list; + int max_rqt; void *rqtc; int inlen; void *in; int i, j; int err; - log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size)); - if (log_max_rqt < 1) + max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2, + 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size)); + if (max_rqt < 1) return -EOPNOTSUPP; - inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num); + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num); in = kzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -1211,10 +1256,9 @@ static int create_rqt(struct mlx5_vdpa_net *ndev) rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); - MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt); - MLX5_SET(rqtc, rqtc, rqt_actual_size, 1); + MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt); list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); - for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) { + for (i = 0, j = 0; j < max_rqt; j++) { if (!ndev->vqs[j].initialized) continue; @@ -1223,6 +1267,7 @@ static int create_rqt(struct mlx5_vdpa_net *ndev) i++; } } + MLX5_SET(rqtc, rqtc, rqt_actual_size, i); err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn); kfree(in); @@ -1232,6 +1277,52 @@ static int create_rqt(struct mlx5_vdpa_net *ndev) return 0; } +#define MLX5_MODIFY_RQT_NUM_RQS ((u64)1) + +static int modify_rqt(struct mlx5_vdpa_net *ndev, int num) +{ + __be32 *list; + int max_rqt; + void *rqtc; + int inlen; + void *in; + int i, j; + int err; + + max_rqt = min_t(int, ndev->cur_num_vqs / 2, + 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size)); + if (max_rqt < 1) + return -EOPNOTSUPP; + + inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num); + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid); + MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS); + rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); + MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); + + list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); + for (i = 0, j = 0; j < num; j++) { + if (!ndev->vqs[j].initialized) + continue; + + if (!vq_is_tx(ndev->vqs[j].index)) { + list[i] = cpu_to_be32(ndev->vqs[j].virtq_id); + i++; + } + } + MLX5_SET(rqtc, rqtc, rqt_actual_size, i); + err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn); + kfree(in); + if (err) + return err; + + return 0; +} + static void destroy_rqt(struct mlx5_vdpa_net *ndev) { mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn); @@ -1345,12 +1436,206 @@ static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev) ndev->rx_rule = NULL; } +static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) +{ + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_control_vq *cvq = &mvdev->cvq; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + struct mlx5_core_dev *pfmdev; + size_t read; + u8 mac[ETH_ALEN]; + + pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); + switch (cmd) { + case VIRTIO_NET_CTRL_MAC_ADDR_SET: + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN); + if (read != ETH_ALEN) + break; + + if (!memcmp(ndev->config.mac, mac, 6)) { + status = VIRTIO_NET_OK; + break; + } + + if (!is_zero_ether_addr(ndev->config.mac)) { + if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { + mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n", + ndev->config.mac); + break; + } + } + + if (mlx5_mpfs_add_mac(pfmdev, mac)) { + mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n", + mac); + break; + } + + memcpy(ndev->config.mac, mac, ETH_ALEN); + status = VIRTIO_NET_OK; + break; + + default: + break; + } + + return status; +} + +static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps) +{ + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + int cur_qps = ndev->cur_num_vqs / 2; + int err; + int i; + + if (cur_qps > newqps) { + err = modify_rqt(ndev, 2 * newqps); + if (err) + return err; + + for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--) + teardown_vq(ndev, &ndev->vqs[i]); + + ndev->cur_num_vqs = 2 * newqps; + } else { + ndev->cur_num_vqs = 2 * newqps; + for (i = cur_qps * 2; i < 2 * newqps; i++) { + err = setup_vq(ndev, &ndev->vqs[i]); + if (err) + goto clean_added; + } + err = modify_rqt(ndev, 2 * newqps); + if (err) + goto clean_added; + } + return 0; + +clean_added: + for (--i; i >= cur_qps; --i) + teardown_vq(ndev, &ndev->vqs[i]); + + return err; +} + +static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd) +{ + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + struct mlx5_control_vq *cvq = &mvdev->cvq; + struct virtio_net_ctrl_mq mq; + size_t read; + u16 newqps; + + switch (cmd) { + case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET: + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq)); + if (read != sizeof(mq)) + break; + + newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs); + if (ndev->cur_num_vqs == 2 * newqps) { + status = VIRTIO_NET_OK; + break; + } + + if (newqps & (newqps - 1)) + break; + + if (!change_num_qps(mvdev, newqps)) + status = VIRTIO_NET_OK; + + break; + default: + break; + } + + return status; +} + +static void mlx5_cvq_kick_handler(struct work_struct *work) +{ + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + struct virtio_net_ctrl_hdr ctrl; + struct mlx5_ctrl_wq_ent *wqent; + struct mlx5_vdpa_dev *mvdev; + struct mlx5_control_vq *cvq; + struct mlx5_vdpa_net *ndev; + size_t read, write; + int err; + + wqent = container_of(work, struct mlx5_ctrl_wq_ent, work); + mvdev = wqent->mvdev; + ndev = to_mlx5_vdpa_ndev(mvdev); + cvq = &mvdev->cvq; + if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) + goto out; + + if (!cvq->ready) + goto out; + + while (true) { + err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head, + GFP_ATOMIC); + if (err <= 0) + break; + + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl)); + if (read != sizeof(ctrl)) + break; + + switch (ctrl.class) { + case VIRTIO_NET_CTRL_MAC: + status = handle_ctrl_mac(mvdev, ctrl.cmd); + break; + case VIRTIO_NET_CTRL_MQ: + status = handle_ctrl_mq(mvdev, ctrl.cmd); + break; + + default: + break; + } + + /* Make sure data is written before advancing index */ + smp_wmb(); + + write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status)); + vringh_complete_iotlb(&cvq->vring, cvq->head, write); + vringh_kiov_cleanup(&cvq->riov); + vringh_kiov_cleanup(&cvq->wiov); + + if (vringh_need_notify_iotlb(&cvq->vring)) + vringh_notify(&cvq->vring); + } +out: + kfree(wqent); +} + static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + struct mlx5_vdpa_virtqueue *mvq; + struct mlx5_ctrl_wq_ent *wqent; + + if (!is_index_valid(mvdev, idx)) + return; + + if (unlikely(is_ctrl_vq_idx(mvdev, idx))) { + if (!mvdev->cvq.ready) + return; + + wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); + if (!wqent) + return; + wqent->mvdev = mvdev; + INIT_WORK(&wqent->work, mlx5_cvq_kick_handler); + queue_work(mvdev->wq, &wqent->work); + return; + } + + mvq = &ndev->vqs[idx]; if (unlikely(!mvq->ready)) return; @@ -1362,8 +1647,19 @@ static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_ { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + struct mlx5_vdpa_virtqueue *mvq; + + if (!is_index_valid(mvdev, idx)) + return -EINVAL; + if (is_ctrl_vq_idx(mvdev, idx)) { + mvdev->cvq.desc_addr = desc_area; + mvdev->cvq.device_addr = device_area; + mvdev->cvq.driver_addr = driver_area; + return 0; + } + + mvq = &ndev->vqs[idx]; mvq->desc_addr = desc_area; mvq->device_addr = device_area; mvq->driver_addr = driver_area; @@ -1376,6 +1672,9 @@ static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct mlx5_vdpa_virtqueue *mvq; + if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) + return; + mvq = &ndev->vqs[idx]; mvq->num_ent = num; } @@ -1384,17 +1683,46 @@ static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_c { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx]; - vq->event_cb = *cb; + ndev->event_cbs[idx] = *cb; +} + +static void mlx5_cvq_notify(struct vringh *vring) +{ + struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring); + + if (!cvq->event_cb.callback) + return; + + cvq->event_cb.callback(cvq->event_cb.private); +} + +static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready) +{ + struct mlx5_control_vq *cvq = &mvdev->cvq; + + cvq->ready = ready; + if (!ready) + return; + + cvq->vring.notify = mlx5_cvq_notify; } static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + struct mlx5_vdpa_virtqueue *mvq; + + if (!is_index_valid(mvdev, idx)) + return; + + if (is_ctrl_vq_idx(mvdev, idx)) { + set_cvq_ready(mvdev, ready); + return; + } + mvq = &ndev->vqs[idx]; if (!ready) suspend_vq(ndev, mvq); @@ -1405,9 +1733,14 @@ static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; - return mvq->ready; + if (!is_index_valid(mvdev, idx)) + return false; + + if (is_ctrl_vq_idx(mvdev, idx)) + return mvdev->cvq.ready; + + return ndev->vqs[idx].ready; } static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, @@ -1415,8 +1748,17 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + struct mlx5_vdpa_virtqueue *mvq; + if (!is_index_valid(mvdev, idx)) + return -EINVAL; + + if (is_ctrl_vq_idx(mvdev, idx)) { + mvdev->cvq.vring.last_avail_idx = state->split.avail_index; + return 0; + } + + mvq = &ndev->vqs[idx]; if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) { mlx5_vdpa_warn(mvdev, "can't modify available index\n"); return -EINVAL; @@ -1431,10 +1773,19 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + struct mlx5_vdpa_virtqueue *mvq; struct mlx5_virtq_attr attr; int err; + if (!is_index_valid(mvdev, idx)) + return -EINVAL; + + if (is_ctrl_vq_idx(mvdev, idx)) { + state->split.avail_index = mvdev->cvq.vring.last_avail_idx; + return 0; + } + + mvq = &ndev->vqs[idx]; /* If the virtq object was destroyed, use the value saved at * the last minute of suspend_vq. This caters for userspace * that cares about emulating the index after vq is stopped. @@ -1491,10 +1842,14 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev) u16 dev_features; dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask); - ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features); + ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features); if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0)) ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1); ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ); + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ); + print_features(mvdev, ndev->mvdev.mlx_features, false); return ndev->mvdev.mlx_features; } @@ -1507,17 +1862,29 @@ static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features) return 0; } -static int setup_virtqueues(struct mlx5_vdpa_net *ndev) +static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) { + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_control_vq *cvq = &mvdev->cvq; int err; int i; - for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); i++) { + for (i = 0; i < 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); i++) { err = setup_vq(ndev, &ndev->vqs[i]); if (err) goto err_vq; } + if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { + err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, + MLX5_CVQ_MAX_ENT, false, + (struct vring_desc *)(uintptr_t)cvq->desc_addr, + (struct vring_avail *)(uintptr_t)cvq->driver_addr, + (struct vring_used *)(uintptr_t)cvq->device_addr); + if (err) + goto err_vq; + } + return 0; err_vq: @@ -1541,16 +1908,22 @@ static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) } } -/* TODO: cross-endian support */ -static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev) -{ - return virtio_legacy_is_little_endian() || - (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1)); -} - -static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val) +static void update_cvq_info(struct mlx5_vdpa_dev *mvdev) { - return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val); + if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) { + if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) { + /* MQ supported. CVQ index is right above the last data virtqueue's */ + mvdev->max_idx = mvdev->max_vqs; + } else { + /* Only CVQ supportted. data virtqueues occupy indices 0 and 1. + * CVQ gets index 2 + */ + mvdev->max_idx = 2; + } + } else { + /* Two data virtqueues only: one for rx and one for tx */ + mvdev->max_idx = 1; + } } static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features) @@ -1568,6 +1941,7 @@ static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features) ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu); ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); + update_cvq_info(mvdev); return err; } @@ -1605,15 +1979,14 @@ static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev) static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { struct mlx5_vq_restore_info *ri = &mvq->ri; - struct mlx5_virtq_attr attr; + struct mlx5_virtq_attr attr = {}; int err; - if (!mvq->initialized) - return 0; - - err = query_virtqueue(ndev, mvq, &attr); - if (err) - return err; + if (mvq->initialized) { + err = query_virtqueue(ndev, mvq, &attr); + if (err) + return err; + } ri->avail_index = attr.available_index; ri->used_index = attr.used_index; @@ -1622,7 +1995,6 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu ri->desc_addr = mvq->desc_addr; ri->device_addr = mvq->device_addr; ri->driver_addr = mvq->driver_addr; - ri->cb = mvq->event_cb; ri->restore = true; return 0; } @@ -1667,12 +2039,12 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev) mvq->desc_addr = ri->desc_addr; mvq->device_addr = ri->device_addr; mvq->driver_addr = ri->driver_addr; - mvq->event_cb = ri->cb; } } -static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *iotlb) +static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) { + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); int err; suspend_vqs(ndev); @@ -1681,58 +2053,59 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb * goto err_mr; teardown_driver(ndev); - mlx5_vdpa_destroy_mr(&ndev->mvdev); - err = mlx5_vdpa_create_mr(&ndev->mvdev, iotlb); + mlx5_vdpa_destroy_mr(mvdev); + err = mlx5_vdpa_create_mr(mvdev, iotlb); if (err) goto err_mr; - if (!(ndev->mvdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) + if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) return 0; restore_channels_info(ndev); - err = setup_driver(ndev); + err = setup_driver(mvdev); if (err) goto err_setup; return 0; err_setup: - mlx5_vdpa_destroy_mr(&ndev->mvdev); + mlx5_vdpa_destroy_mr(mvdev); err_mr: return err; } -static int setup_driver(struct mlx5_vdpa_net *ndev) +static int setup_driver(struct mlx5_vdpa_dev *mvdev) { + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); int err; mutex_lock(&ndev->reslock); if (ndev->setup) { - mlx5_vdpa_warn(&ndev->mvdev, "setup driver called for already setup driver\n"); + mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n"); err = 0; goto out; } - err = setup_virtqueues(ndev); + err = setup_virtqueues(mvdev); if (err) { - mlx5_vdpa_warn(&ndev->mvdev, "setup_virtqueues\n"); + mlx5_vdpa_warn(mvdev, "setup_virtqueues\n"); goto out; } err = create_rqt(ndev); if (err) { - mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n"); + mlx5_vdpa_warn(mvdev, "create_rqt\n"); goto err_rqt; } err = create_tir(ndev); if (err) { - mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n"); + mlx5_vdpa_warn(mvdev, "create_tir\n"); goto err_tir; } err = add_fwd_to_tir(ndev); if (err) { - mlx5_vdpa_warn(&ndev->mvdev, "add_fwd_to_tir\n"); + mlx5_vdpa_warn(mvdev, "add_fwd_to_tir\n"); goto err_fwd; } ndev->setup = true; @@ -1781,24 +2154,10 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) int err; print_status(mvdev, status, true); - if (!status) { - mlx5_vdpa_info(mvdev, "performing device reset\n"); - teardown_driver(ndev); - clear_vqs_ready(ndev); - mlx5_vdpa_destroy_mr(&ndev->mvdev); - ndev->mvdev.status = 0; - ndev->mvdev.mlx_features = 0; - ++mvdev->generation; - if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { - if (mlx5_vdpa_create_mr(mvdev, NULL)) - mlx5_vdpa_warn(mvdev, "create MR failed\n"); - } - return; - } if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { if (status & VIRTIO_CONFIG_S_DRIVER_OK) { - err = setup_driver(ndev); + err = setup_driver(mvdev); if (err) { mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); goto err_setup; @@ -1817,6 +2176,29 @@ err_setup: ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; } +static int mlx5_vdpa_reset(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + + print_status(mvdev, 0, true); + mlx5_vdpa_info(mvdev, "performing device reset\n"); + teardown_driver(ndev); + clear_vqs_ready(ndev); + mlx5_vdpa_destroy_mr(&ndev->mvdev); + ndev->mvdev.status = 0; + ndev->mvdev.mlx_features = 0; + memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs)); + ndev->mvdev.actual_features = 0; + ++mvdev->generation; + if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { + if (mlx5_vdpa_create_mr(mvdev, NULL)) + mlx5_vdpa_warn(mvdev, "create MR failed\n"); + } + + return 0; +} + static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev) { return sizeof(struct virtio_net_config); @@ -1848,7 +2230,6 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); - struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); bool change_map; int err; @@ -1859,7 +2240,7 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb } if (change_map) - return mlx5_vdpa_change_map(ndev, iotlb); + return mlx5_vdpa_change_map(mvdev, iotlb); return 0; } @@ -1889,6 +2270,9 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device struct mlx5_vdpa_net *ndev; phys_addr_t addr; + if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) + return ret; + /* If SF BAR size is smaller than PAGE_SIZE, do not use direct * notification to avoid the risk of mapping pages that contain BAR of more * than one SF @@ -1928,6 +2312,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = { .get_vendor_id = mlx5_vdpa_get_vendor_id, .get_status = mlx5_vdpa_get_status, .set_status = mlx5_vdpa_set_status, + .reset = mlx5_vdpa_reset, .get_config_size = mlx5_vdpa_get_config_size, .get_config = mlx5_vdpa_get_config, .set_config = mlx5_vdpa_set_config, @@ -2040,7 +2425,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS); ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, - name); + name, false); if (IS_ERR(ndev)) return PTR_ERR(ndev); @@ -2063,8 +2448,11 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) err = mlx5_mpfs_add_mac(pfmdev, config->mac); if (err) goto err_mtu; + + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC); } + config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs)); mvdev->vdev.dma_dev = &mdev->pdev->dev; err = mlx5_vdpa_alloc_resources(&ndev->mvdev); if (err) @@ -2080,8 +2468,15 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) if (err) goto err_mr; + mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq"); + if (!mvdev->wq) { + err = -ENOMEM; + goto err_res2; + } + + ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs); mvdev->vdev.mdev = &mgtdev->mgtdev; - err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs)); + err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1); if (err) goto err_reg; @@ -2089,6 +2484,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) return 0; err_reg: + destroy_workqueue(mvdev->wq); +err_res2: free_resources(ndev); err_mr: mlx5_vdpa_destroy_mr(mvdev); @@ -2106,7 +2503,9 @@ err_mtu: static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev) { struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); + struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); + destroy_workqueue(mvdev->wq); _vdpa_unregister_device(dev); mgtdev->ndev = NULL; } diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 3fc4525fc05c..1dc121a07a93 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -69,6 +69,7 @@ static void vdpa_release_dev(struct device *d) * @config: the bus operations that is supported by this device * @size: size of the parent structure that contains private data * @name: name of the vdpa device; optional. + * @use_va: indicate whether virtual address must be used by this device * * Driver should use vdpa_alloc_device() wrapper macro instead of * using this directly. @@ -78,7 +79,8 @@ static void vdpa_release_dev(struct device *d) */ struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, - size_t size, const char *name) + size_t size, const char *name, + bool use_va) { struct vdpa_device *vdev; int err = -EINVAL; @@ -89,6 +91,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, if (!!config->dma_map != !!config->dma_unmap) goto err; + /* It should only work for the device that use on-chip IOMMU */ + if (use_va && !(config->dma_map || config->set_map)) + goto err; + err = -ENOMEM; vdev = kzalloc(size, GFP_KERNEL); if (!vdev) @@ -104,6 +110,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, vdev->index = err; vdev->config = config; vdev->features_valid = false; + vdev->use_va = use_va; if (name) err = dev_set_name(&vdev->dev, "%s", name); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index c621cf7feec0..5f484fff8dbe 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -92,7 +92,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim, vq->vring.notify = NULL; } -static void vdpasim_reset(struct vdpasim *vdpasim) +static void vdpasim_do_reset(struct vdpasim *vdpasim) { int i; @@ -137,7 +137,8 @@ static dma_addr_t vdpasim_map_range(struct vdpasim *vdpasim, phys_addr_t paddr, int ret; /* We set the limit_pfn to the maximum (ULONG_MAX - 1) */ - iova = alloc_iova(&vdpasim->iova, size, ULONG_MAX - 1, true); + iova = alloc_iova(&vdpasim->iova, size >> iova_shift(&vdpasim->iova), + ULONG_MAX - 1, true); if (!iova) return DMA_MAPPING_ERROR; @@ -250,7 +251,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) ops = &vdpasim_config_ops; vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, - dev_attr->name); + dev_attr->name, false); if (IS_ERR(vdpasim)) { ret = PTR_ERR(vdpasim); goto err_alloc; @@ -459,11 +460,21 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status) spin_lock(&vdpasim->lock); vdpasim->status = status; - if (status == 0) - vdpasim_reset(vdpasim); spin_unlock(&vdpasim->lock); } +static int vdpasim_reset(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + spin_lock(&vdpasim->lock); + vdpasim->status = 0; + vdpasim_do_reset(vdpasim); + spin_unlock(&vdpasim->lock); + + return 0; +} + static size_t vdpasim_get_config_size(struct vdpa_device *vdpa) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); @@ -544,14 +555,14 @@ err: } static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size, - u64 pa, u32 perm) + u64 pa, u32 perm, void *opaque) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); int ret; spin_lock(&vdpasim->iommu_lock); - ret = vhost_iotlb_add_range(vdpasim->iommu, iova, iova + size - 1, pa, - perm); + ret = vhost_iotlb_add_range_ctx(vdpasim->iommu, iova, iova + size - 1, + pa, perm, opaque); spin_unlock(&vdpasim->iommu_lock); return ret; @@ -607,6 +618,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = { .get_vendor_id = vdpasim_get_vendor_id, .get_status = vdpasim_get_status, .set_status = vdpasim_set_status, + .reset = vdpasim_reset, .get_config_size = vdpasim_get_config_size, .get_config = vdpasim_get_config, .set_config = vdpasim_set_config, @@ -635,6 +647,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { .get_vendor_id = vdpasim_get_vendor_id, .get_status = vdpasim_get_status, .set_status = vdpasim_set_status, + .reset = vdpasim_reset, .get_config_size = vdpasim_get_config_size, .get_config = vdpasim_get_config, .set_config = vdpasim_set_config, diff --git a/drivers/vdpa/vdpa_user/Makefile b/drivers/vdpa/vdpa_user/Makefile new file mode 100644 index 000000000000..260e0b26af99 --- /dev/null +++ b/drivers/vdpa/vdpa_user/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +vduse-y := vduse_dev.o iova_domain.o + +obj-$(CONFIG_VDPA_USER) += vduse.o diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c new file mode 100644 index 000000000000..1daae2608860 --- /dev/null +++ b/drivers/vdpa/vdpa_user/iova_domain.c @@ -0,0 +1,545 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * MMU-based software IOTLB. + * + * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved. + * + * Author: Xie Yongji <xieyongji@bytedance.com> + * + */ + +#include <linux/slab.h> +#include <linux/file.h> +#include <linux/anon_inodes.h> +#include <linux/highmem.h> +#include <linux/vmalloc.h> +#include <linux/vdpa.h> + +#include "iova_domain.h" + +static int vduse_iotlb_add_range(struct vduse_iova_domain *domain, + u64 start, u64 last, + u64 addr, unsigned int perm, + struct file *file, u64 offset) +{ + struct vdpa_map_file *map_file; + int ret; + + map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC); + if (!map_file) + return -ENOMEM; + + map_file->file = get_file(file); + map_file->offset = offset; + + ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last, + addr, perm, map_file); + if (ret) { + fput(map_file->file); + kfree(map_file); + return ret; + } + return 0; +} + +static void vduse_iotlb_del_range(struct vduse_iova_domain *domain, + u64 start, u64 last) +{ + struct vdpa_map_file *map_file; + struct vhost_iotlb_map *map; + + while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) { + map_file = (struct vdpa_map_file *)map->opaque; + fput(map_file->file); + kfree(map_file); + vhost_iotlb_map_free(domain->iotlb, map); + } +} + +int vduse_domain_set_map(struct vduse_iova_domain *domain, + struct vhost_iotlb *iotlb) +{ + struct vdpa_map_file *map_file; + struct vhost_iotlb_map *map; + u64 start = 0ULL, last = ULLONG_MAX; + int ret; + + spin_lock(&domain->iotlb_lock); + vduse_iotlb_del_range(domain, start, last); + + for (map = vhost_iotlb_itree_first(iotlb, start, last); map; + map = vhost_iotlb_itree_next(map, start, last)) { + map_file = (struct vdpa_map_file *)map->opaque; + ret = vduse_iotlb_add_range(domain, map->start, map->last, + map->addr, map->perm, + map_file->file, + map_file->offset); + if (ret) + goto err; + } + spin_unlock(&domain->iotlb_lock); + + return 0; +err: + vduse_iotlb_del_range(domain, start, last); + spin_unlock(&domain->iotlb_lock); + return ret; +} + +void vduse_domain_clear_map(struct vduse_iova_domain *domain, + struct vhost_iotlb *iotlb) +{ + struct vhost_iotlb_map *map; + u64 start = 0ULL, last = ULLONG_MAX; + + spin_lock(&domain->iotlb_lock); + for (map = vhost_iotlb_itree_first(iotlb, start, last); map; + map = vhost_iotlb_itree_next(map, start, last)) { + vduse_iotlb_del_range(domain, map->start, map->last); + } + spin_unlock(&domain->iotlb_lock); +} + +static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain, + u64 iova, u64 size, u64 paddr) +{ + struct vduse_bounce_map *map; + u64 last = iova + size - 1; + + while (iova <= last) { + map = &domain->bounce_maps[iova >> PAGE_SHIFT]; + if (!map->bounce_page) { + map->bounce_page = alloc_page(GFP_ATOMIC); + if (!map->bounce_page) + return -ENOMEM; + } + map->orig_phys = paddr; + paddr += PAGE_SIZE; + iova += PAGE_SIZE; + } + return 0; +} + +static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain, + u64 iova, u64 size) +{ + struct vduse_bounce_map *map; + u64 last = iova + size - 1; + + while (iova <= last) { + map = &domain->bounce_maps[iova >> PAGE_SHIFT]; + map->orig_phys = INVALID_PHYS_ADDR; + iova += PAGE_SIZE; + } +} + +static void do_bounce(phys_addr_t orig, void *addr, size_t size, + enum dma_data_direction dir) +{ + unsigned long pfn = PFN_DOWN(orig); + unsigned int offset = offset_in_page(orig); + char *buffer; + unsigned int sz = 0; + + while (size) { + sz = min_t(size_t, PAGE_SIZE - offset, size); + + buffer = kmap_atomic(pfn_to_page(pfn)); + if (dir == DMA_TO_DEVICE) + memcpy(addr, buffer + offset, sz); + else + memcpy(buffer + offset, addr, sz); + kunmap_atomic(buffer); + + size -= sz; + pfn++; + addr += sz; + offset = 0; + } +} + +static void vduse_domain_bounce(struct vduse_iova_domain *domain, + dma_addr_t iova, size_t size, + enum dma_data_direction dir) +{ + struct vduse_bounce_map *map; + unsigned int offset; + void *addr; + size_t sz; + + if (iova >= domain->bounce_size) + return; + + while (size) { + map = &domain->bounce_maps[iova >> PAGE_SHIFT]; + offset = offset_in_page(iova); + sz = min_t(size_t, PAGE_SIZE - offset, size); + + if (WARN_ON(!map->bounce_page || + map->orig_phys == INVALID_PHYS_ADDR)) + return; + + addr = page_address(map->bounce_page) + offset; + do_bounce(map->orig_phys + offset, addr, sz, dir); + size -= sz; + iova += sz; + } +} + +static struct page * +vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova) +{ + u64 start = iova & PAGE_MASK; + u64 last = start + PAGE_SIZE - 1; + struct vhost_iotlb_map *map; + struct page *page = NULL; + + spin_lock(&domain->iotlb_lock); + map = vhost_iotlb_itree_first(domain->iotlb, start, last); + if (!map) + goto out; + + page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT); + get_page(page); +out: + spin_unlock(&domain->iotlb_lock); + + return page; +} + +static struct page * +vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova) +{ + struct vduse_bounce_map *map; + struct page *page = NULL; + + spin_lock(&domain->iotlb_lock); + map = &domain->bounce_maps[iova >> PAGE_SHIFT]; + if (!map->bounce_page) + goto out; + + page = map->bounce_page; + get_page(page); +out: + spin_unlock(&domain->iotlb_lock); + + return page; +} + +static void +vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain) +{ + struct vduse_bounce_map *map; + unsigned long pfn, bounce_pfns; + + bounce_pfns = domain->bounce_size >> PAGE_SHIFT; + + for (pfn = 0; pfn < bounce_pfns; pfn++) { + map = &domain->bounce_maps[pfn]; + if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR)) + continue; + + if (!map->bounce_page) + continue; + + __free_page(map->bounce_page); + map->bounce_page = NULL; + } +} + +void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain) +{ + if (!domain->bounce_map) + return; + + spin_lock(&domain->iotlb_lock); + if (!domain->bounce_map) + goto unlock; + + vduse_iotlb_del_range(domain, 0, domain->bounce_size - 1); + domain->bounce_map = 0; +unlock: + spin_unlock(&domain->iotlb_lock); +} + +static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain) +{ + int ret = 0; + + if (domain->bounce_map) + return 0; + + spin_lock(&domain->iotlb_lock); + if (domain->bounce_map) + goto unlock; + + ret = vduse_iotlb_add_range(domain, 0, domain->bounce_size - 1, + 0, VHOST_MAP_RW, domain->file, 0); + if (ret) + goto unlock; + + domain->bounce_map = 1; +unlock: + spin_unlock(&domain->iotlb_lock); + return ret; +} + +static dma_addr_t +vduse_domain_alloc_iova(struct iova_domain *iovad, + unsigned long size, unsigned long limit) +{ + unsigned long shift = iova_shift(iovad); + unsigned long iova_len = iova_align(iovad, size) >> shift; + unsigned long iova_pfn; + + /* + * Freeing non-power-of-two-sized allocations back into the IOVA caches + * will come back to bite us badly, so we have to waste a bit of space + * rounding up anything cacheable to make sure that can't happen. The + * order of the unadjusted size will still match upon freeing. + */ + if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) + iova_len = roundup_pow_of_two(iova_len); + iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true); + + return iova_pfn << shift; +} + +static void vduse_domain_free_iova(struct iova_domain *iovad, + dma_addr_t iova, size_t size) +{ + unsigned long shift = iova_shift(iovad); + unsigned long iova_len = iova_align(iovad, size) >> shift; + + free_iova_fast(iovad, iova >> shift, iova_len); +} + +dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain, + struct page *page, unsigned long offset, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct iova_domain *iovad = &domain->stream_iovad; + unsigned long limit = domain->bounce_size - 1; + phys_addr_t pa = page_to_phys(page) + offset; + dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit); + + if (!iova) + return DMA_MAPPING_ERROR; + + if (vduse_domain_init_bounce_map(domain)) + goto err; + + if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa)) + goto err; + + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) + vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE); + + return iova; +err: + vduse_domain_free_iova(iovad, iova, size); + return DMA_MAPPING_ERROR; +} + +void vduse_domain_unmap_page(struct vduse_iova_domain *domain, + dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + struct iova_domain *iovad = &domain->stream_iovad; + + if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) + vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE); + + vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size); + vduse_domain_free_iova(iovad, dma_addr, size); +} + +void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain, + size_t size, dma_addr_t *dma_addr, + gfp_t flag, unsigned long attrs) +{ + struct iova_domain *iovad = &domain->consistent_iovad; + unsigned long limit = domain->iova_limit; + dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit); + void *orig = alloc_pages_exact(size, flag); + + if (!iova || !orig) + goto err; + + spin_lock(&domain->iotlb_lock); + if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1, + virt_to_phys(orig), VHOST_MAP_RW, + domain->file, (u64)iova)) { + spin_unlock(&domain->iotlb_lock); + goto err; + } + spin_unlock(&domain->iotlb_lock); + + *dma_addr = iova; + + return orig; +err: + *dma_addr = DMA_MAPPING_ERROR; + if (orig) + free_pages_exact(orig, size); + if (iova) + vduse_domain_free_iova(iovad, iova, size); + + return NULL; +} + +void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size, + void *vaddr, dma_addr_t dma_addr, + unsigned long attrs) +{ + struct iova_domain *iovad = &domain->consistent_iovad; + struct vhost_iotlb_map *map; + struct vdpa_map_file *map_file; + phys_addr_t pa; + + spin_lock(&domain->iotlb_lock); + map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr, + (u64)dma_addr + size - 1); + if (WARN_ON(!map)) { + spin_unlock(&domain->iotlb_lock); + return; + } + map_file = (struct vdpa_map_file *)map->opaque; + fput(map_file->file); + kfree(map_file); + pa = map->addr; + vhost_iotlb_map_free(domain->iotlb, map); + spin_unlock(&domain->iotlb_lock); + + vduse_domain_free_iova(iovad, dma_addr, size); + free_pages_exact(phys_to_virt(pa), size); +} + +static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf) +{ + struct vduse_iova_domain *domain = vmf->vma->vm_private_data; + unsigned long iova = vmf->pgoff << PAGE_SHIFT; + struct page *page; + + if (!domain) + return VM_FAULT_SIGBUS; + + if (iova < domain->bounce_size) + page = vduse_domain_get_bounce_page(domain, iova); + else + page = vduse_domain_get_coherent_page(domain, iova); + + if (!page) + return VM_FAULT_SIGBUS; + + vmf->page = page; + + return 0; +} + +static const struct vm_operations_struct vduse_domain_mmap_ops = { + .fault = vduse_domain_mmap_fault, +}; + +static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct vduse_iova_domain *domain = file->private_data; + + vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND; + vma->vm_private_data = domain; + vma->vm_ops = &vduse_domain_mmap_ops; + + return 0; +} + +static int vduse_domain_release(struct inode *inode, struct file *file) +{ + struct vduse_iova_domain *domain = file->private_data; + + spin_lock(&domain->iotlb_lock); + vduse_iotlb_del_range(domain, 0, ULLONG_MAX); + vduse_domain_free_bounce_pages(domain); + spin_unlock(&domain->iotlb_lock); + put_iova_domain(&domain->stream_iovad); + put_iova_domain(&domain->consistent_iovad); + vhost_iotlb_free(domain->iotlb); + vfree(domain->bounce_maps); + kfree(domain); + + return 0; +} + +static const struct file_operations vduse_domain_fops = { + .owner = THIS_MODULE, + .mmap = vduse_domain_mmap, + .release = vduse_domain_release, +}; + +void vduse_domain_destroy(struct vduse_iova_domain *domain) +{ + fput(domain->file); +} + +struct vduse_iova_domain * +vduse_domain_create(unsigned long iova_limit, size_t bounce_size) +{ + struct vduse_iova_domain *domain; + struct file *file; + struct vduse_bounce_map *map; + unsigned long pfn, bounce_pfns; + + bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT; + if (iova_limit <= bounce_size) + return NULL; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return NULL; + + domain->iotlb = vhost_iotlb_alloc(0, 0); + if (!domain->iotlb) + goto err_iotlb; + + domain->iova_limit = iova_limit; + domain->bounce_size = PAGE_ALIGN(bounce_size); + domain->bounce_maps = vzalloc(bounce_pfns * + sizeof(struct vduse_bounce_map)); + if (!domain->bounce_maps) + goto err_map; + + for (pfn = 0; pfn < bounce_pfns; pfn++) { + map = &domain->bounce_maps[pfn]; + map->orig_phys = INVALID_PHYS_ADDR; + } + file = anon_inode_getfile("[vduse-domain]", &vduse_domain_fops, + domain, O_RDWR); + if (IS_ERR(file)) + goto err_file; + + domain->file = file; + spin_lock_init(&domain->iotlb_lock); + init_iova_domain(&domain->stream_iovad, + PAGE_SIZE, IOVA_START_PFN); + init_iova_domain(&domain->consistent_iovad, + PAGE_SIZE, bounce_pfns); + + return domain; +err_file: + vfree(domain->bounce_maps); +err_map: + vhost_iotlb_free(domain->iotlb); +err_iotlb: + kfree(domain); + return NULL; +} + +int vduse_domain_init(void) +{ + return iova_cache_get(); +} + +void vduse_domain_exit(void) +{ + iova_cache_put(); +} diff --git a/drivers/vdpa/vdpa_user/iova_domain.h b/drivers/vdpa/vdpa_user/iova_domain.h new file mode 100644 index 000000000000..2722d9b8e21a --- /dev/null +++ b/drivers/vdpa/vdpa_user/iova_domain.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * MMU-based software IOTLB. + * + * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved. + * + * Author: Xie Yongji <xieyongji@bytedance.com> + * + */ + +#ifndef _VDUSE_IOVA_DOMAIN_H +#define _VDUSE_IOVA_DOMAIN_H + +#include <linux/iova.h> +#include <linux/dma-mapping.h> +#include <linux/vhost_iotlb.h> + +#define IOVA_START_PFN 1 + +#define INVALID_PHYS_ADDR (~(phys_addr_t)0) + +struct vduse_bounce_map { + struct page *bounce_page; + u64 orig_phys; +}; + +struct vduse_iova_domain { + struct iova_domain stream_iovad; + struct iova_domain consistent_iovad; + struct vduse_bounce_map *bounce_maps; + size_t bounce_size; + unsigned long iova_limit; + int bounce_map; + struct vhost_iotlb *iotlb; + spinlock_t iotlb_lock; + struct file *file; +}; + +int vduse_domain_set_map(struct vduse_iova_domain *domain, + struct vhost_iotlb *iotlb); + +void vduse_domain_clear_map(struct vduse_iova_domain *domain, + struct vhost_iotlb *iotlb); + +dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain, + struct page *page, unsigned long offset, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + +void vduse_domain_unmap_page(struct vduse_iova_domain *domain, + dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir, unsigned long attrs); + +void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain, + size_t size, dma_addr_t *dma_addr, + gfp_t flag, unsigned long attrs); + +void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size, + void *vaddr, dma_addr_t dma_addr, + unsigned long attrs); + +void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain); + +void vduse_domain_destroy(struct vduse_iova_domain *domain); + +struct vduse_iova_domain *vduse_domain_create(unsigned long iova_limit, + size_t bounce_size); + +int vduse_domain_init(void); + +void vduse_domain_exit(void); + +#endif /* _VDUSE_IOVA_DOMAIN_H */ diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c new file mode 100644 index 000000000000..29a38ecba19e --- /dev/null +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -0,0 +1,1641 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VDUSE: vDPA Device in Userspace + * + * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved. + * + * Author: Xie Yongji <xieyongji@bytedance.com> + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/cdev.h> +#include <linux/device.h> +#include <linux/eventfd.h> +#include <linux/slab.h> +#include <linux/wait.h> +#include <linux/dma-map-ops.h> +#include <linux/poll.h> +#include <linux/file.h> +#include <linux/uio.h> +#include <linux/vdpa.h> +#include <linux/nospec.h> +#include <uapi/linux/vduse.h> +#include <uapi/linux/vdpa.h> +#include <uapi/linux/virtio_config.h> +#include <uapi/linux/virtio_ids.h> +#include <uapi/linux/virtio_blk.h> +#include <linux/mod_devicetable.h> + +#include "iova_domain.h" + +#define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>" +#define DRV_DESC "vDPA Device in Userspace" +#define DRV_LICENSE "GPL v2" + +#define VDUSE_DEV_MAX (1U << MINORBITS) +#define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024) +#define VDUSE_IOVA_SIZE (128 * 1024 * 1024) +#define VDUSE_MSG_DEFAULT_TIMEOUT 30 + +struct vduse_virtqueue { + u16 index; + u16 num_max; + u32 num; + u64 desc_addr; + u64 driver_addr; + u64 device_addr; + struct vdpa_vq_state state; + bool ready; + bool kicked; + spinlock_t kick_lock; + spinlock_t irq_lock; + struct eventfd_ctx *kickfd; + struct vdpa_callback cb; + struct work_struct inject; + struct work_struct kick; +}; + +struct vduse_dev; + +struct vduse_vdpa { + struct vdpa_device vdpa; + struct vduse_dev *dev; +}; + +struct vduse_dev { + struct vduse_vdpa *vdev; + struct device *dev; + struct vduse_virtqueue *vqs; + struct vduse_iova_domain *domain; + char *name; + struct mutex lock; + spinlock_t msg_lock; + u64 msg_unique; + u32 msg_timeout; + wait_queue_head_t waitq; + struct list_head send_list; + struct list_head recv_list; + struct vdpa_callback config_cb; + struct work_struct inject; + spinlock_t irq_lock; + int minor; + bool broken; + bool connected; + u64 api_version; + u64 device_features; + u64 driver_features; + u32 device_id; + u32 vendor_id; + u32 generation; + u32 config_size; + void *config; + u8 status; + u32 vq_num; + u32 vq_align; +}; + +struct vduse_dev_msg { + struct vduse_dev_request req; + struct vduse_dev_response resp; + struct list_head list; + wait_queue_head_t waitq; + bool completed; +}; + +struct vduse_control { + u64 api_version; +}; + +static DEFINE_MUTEX(vduse_lock); +static DEFINE_IDR(vduse_idr); + +static dev_t vduse_major; +static struct class *vduse_class; +static struct cdev vduse_ctrl_cdev; +static struct cdev vduse_cdev; +static struct workqueue_struct *vduse_irq_wq; + +static u32 allowed_device_id[] = { + VIRTIO_ID_BLOCK, +}; + +static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa) +{ + struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa); + + return vdev->dev; +} + +static inline struct vduse_dev *dev_to_vduse(struct device *dev) +{ + struct vdpa_device *vdpa = dev_to_vdpa(dev); + + return vdpa_to_vduse(vdpa); +} + +static struct vduse_dev_msg *vduse_find_msg(struct list_head *head, + uint32_t request_id) +{ + struct vduse_dev_msg *msg; + + list_for_each_entry(msg, head, list) { + if (msg->req.request_id == request_id) { + list_del(&msg->list); + return msg; + } + } + + return NULL; +} + +static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head) +{ + struct vduse_dev_msg *msg = NULL; + + if (!list_empty(head)) { + msg = list_first_entry(head, struct vduse_dev_msg, list); + list_del(&msg->list); + } + + return msg; +} + +static void vduse_enqueue_msg(struct list_head *head, + struct vduse_dev_msg *msg) +{ + list_add_tail(&msg->list, head); +} + +static void vduse_dev_broken(struct vduse_dev *dev) +{ + struct vduse_dev_msg *msg, *tmp; + + if (unlikely(dev->broken)) + return; + + list_splice_init(&dev->recv_list, &dev->send_list); + list_for_each_entry_safe(msg, tmp, &dev->send_list, list) { + list_del(&msg->list); + msg->completed = 1; + msg->resp.result = VDUSE_REQ_RESULT_FAILED; + wake_up(&msg->waitq); + } + dev->broken = true; + wake_up(&dev->waitq); +} + +static int vduse_dev_msg_sync(struct vduse_dev *dev, + struct vduse_dev_msg *msg) +{ + int ret; + + if (unlikely(dev->broken)) + return -EIO; + + init_waitqueue_head(&msg->waitq); + spin_lock(&dev->msg_lock); + if (unlikely(dev->broken)) { + spin_unlock(&dev->msg_lock); + return -EIO; + } + msg->req.request_id = dev->msg_unique++; + vduse_enqueue_msg(&dev->send_list, msg); + wake_up(&dev->waitq); + spin_unlock(&dev->msg_lock); + if (dev->msg_timeout) + ret = wait_event_killable_timeout(msg->waitq, msg->completed, + (long)dev->msg_timeout * HZ); + else + ret = wait_event_killable(msg->waitq, msg->completed); + + spin_lock(&dev->msg_lock); + if (!msg->completed) { + list_del(&msg->list); + msg->resp.result = VDUSE_REQ_RESULT_FAILED; + /* Mark the device as malfunction when there is a timeout */ + if (!ret) + vduse_dev_broken(dev); + } + ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO; + spin_unlock(&dev->msg_lock); + + return ret; +} + +static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev, + struct vduse_virtqueue *vq, + struct vdpa_vq_state_packed *packed) +{ + struct vduse_dev_msg msg = { 0 }; + int ret; + + msg.req.type = VDUSE_GET_VQ_STATE; + msg.req.vq_state.index = vq->index; + + ret = vduse_dev_msg_sync(dev, &msg); + if (ret) + return ret; + + packed->last_avail_counter = + msg.resp.vq_state.packed.last_avail_counter & 0x0001; + packed->last_avail_idx = + msg.resp.vq_state.packed.last_avail_idx & 0x7FFF; + packed->last_used_counter = + msg.resp.vq_state.packed.last_used_counter & 0x0001; + packed->last_used_idx = + msg.resp.vq_state.packed.last_used_idx & 0x7FFF; + + return 0; +} + +static int vduse_dev_get_vq_state_split(struct vduse_dev *dev, + struct vduse_virtqueue *vq, + struct vdpa_vq_state_split *split) +{ + struct vduse_dev_msg msg = { 0 }; + int ret; + + msg.req.type = VDUSE_GET_VQ_STATE; + msg.req.vq_state.index = vq->index; + + ret = vduse_dev_msg_sync(dev, &msg); + if (ret) + return ret; + + split->avail_index = msg.resp.vq_state.split.avail_index; + + return 0; +} + +static int vduse_dev_set_status(struct vduse_dev *dev, u8 status) +{ + struct vduse_dev_msg msg = { 0 }; + + msg.req.type = VDUSE_SET_STATUS; + msg.req.s.status = status; + + return vduse_dev_msg_sync(dev, &msg); +} + +static int vduse_dev_update_iotlb(struct vduse_dev *dev, + u64 start, u64 last) +{ + struct vduse_dev_msg msg = { 0 }; + + if (last < start) + return -EINVAL; + + msg.req.type = VDUSE_UPDATE_IOTLB; + msg.req.iova.start = start; + msg.req.iova.last = last; + + return vduse_dev_msg_sync(dev, &msg); +} + +static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct vduse_dev *dev = file->private_data; + struct vduse_dev_msg *msg; + int size = sizeof(struct vduse_dev_request); + ssize_t ret; + + if (iov_iter_count(to) < size) + return -EINVAL; + + spin_lock(&dev->msg_lock); + while (1) { + msg = vduse_dequeue_msg(&dev->send_list); + if (msg) + break; + + ret = -EAGAIN; + if (file->f_flags & O_NONBLOCK) + goto unlock; + + spin_unlock(&dev->msg_lock); + ret = wait_event_interruptible_exclusive(dev->waitq, + !list_empty(&dev->send_list)); + if (ret) + return ret; + + spin_lock(&dev->msg_lock); + } + spin_unlock(&dev->msg_lock); + ret = copy_to_iter(&msg->req, size, to); + spin_lock(&dev->msg_lock); + if (ret != size) { + ret = -EFAULT; + vduse_enqueue_msg(&dev->send_list, msg); + goto unlock; + } + vduse_enqueue_msg(&dev->recv_list, msg); +unlock: + spin_unlock(&dev->msg_lock); + + return ret; +} + +static bool is_mem_zero(const char *ptr, int size) +{ + int i; + + for (i = 0; i < size; i++) { + if (ptr[i]) + return false; + } + return true; +} + +static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct vduse_dev *dev = file->private_data; + struct vduse_dev_response resp; + struct vduse_dev_msg *msg; + size_t ret; + + ret = copy_from_iter(&resp, sizeof(resp), from); + if (ret != sizeof(resp)) + return -EINVAL; + + if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved))) + return -EINVAL; + + spin_lock(&dev->msg_lock); + msg = vduse_find_msg(&dev->recv_list, resp.request_id); + if (!msg) { + ret = -ENOENT; + goto unlock; + } + + memcpy(&msg->resp, &resp, sizeof(resp)); + msg->completed = 1; + wake_up(&msg->waitq); +unlock: + spin_unlock(&dev->msg_lock); + + return ret; +} + +static __poll_t vduse_dev_poll(struct file *file, poll_table *wait) +{ + struct vduse_dev *dev = file->private_data; + __poll_t mask = 0; + + poll_wait(file, &dev->waitq, wait); + + spin_lock(&dev->msg_lock); + + if (unlikely(dev->broken)) + mask |= EPOLLERR; + if (!list_empty(&dev->send_list)) + mask |= EPOLLIN | EPOLLRDNORM; + if (!list_empty(&dev->recv_list)) + mask |= EPOLLOUT | EPOLLWRNORM; + + spin_unlock(&dev->msg_lock); + + return mask; +} + +static void vduse_dev_reset(struct vduse_dev *dev) +{ + int i; + struct vduse_iova_domain *domain = dev->domain; + + /* The coherent mappings are handled in vduse_dev_free_coherent() */ + if (domain->bounce_map) + vduse_domain_reset_bounce_map(domain); + + dev->status = 0; + dev->driver_features = 0; + dev->generation++; + spin_lock(&dev->irq_lock); + dev->config_cb.callback = NULL; + dev->config_cb.private = NULL; + spin_unlock(&dev->irq_lock); + flush_work(&dev->inject); + + for (i = 0; i < dev->vq_num; i++) { + struct vduse_virtqueue *vq = &dev->vqs[i]; + + vq->ready = false; + vq->desc_addr = 0; + vq->driver_addr = 0; + vq->device_addr = 0; + vq->num = 0; + memset(&vq->state, 0, sizeof(vq->state)); + + spin_lock(&vq->kick_lock); + vq->kicked = false; + if (vq->kickfd) + eventfd_ctx_put(vq->kickfd); + vq->kickfd = NULL; + spin_unlock(&vq->kick_lock); + + spin_lock(&vq->irq_lock); + vq->cb.callback = NULL; + vq->cb.private = NULL; + spin_unlock(&vq->irq_lock); + flush_work(&vq->inject); + flush_work(&vq->kick); + } +} + +static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx, + u64 desc_area, u64 driver_area, + u64 device_area) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + struct vduse_virtqueue *vq = &dev->vqs[idx]; + + vq->desc_addr = desc_area; + vq->driver_addr = driver_area; + vq->device_addr = device_area; + + return 0; +} + +static void vduse_vq_kick(struct vduse_virtqueue *vq) +{ + spin_lock(&vq->kick_lock); + if (!vq->ready) + goto unlock; + + if (vq->kickfd) + eventfd_signal(vq->kickfd, 1); + else + vq->kicked = true; +unlock: + spin_unlock(&vq->kick_lock); +} + +static void vduse_vq_kick_work(struct work_struct *work) +{ + struct vduse_virtqueue *vq = container_of(work, + struct vduse_virtqueue, kick); + + vduse_vq_kick(vq); +} + +static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + struct vduse_virtqueue *vq = &dev->vqs[idx]; + + if (!eventfd_signal_allowed()) { + schedule_work(&vq->kick); + return; + } + vduse_vq_kick(vq); +} + +static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx, + struct vdpa_callback *cb) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + struct vduse_virtqueue *vq = &dev->vqs[idx]; + + spin_lock(&vq->irq_lock); + vq->cb.callback = cb->callback; + vq->cb.private = cb->private; + spin_unlock(&vq->irq_lock); +} + +static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + struct vduse_virtqueue *vq = &dev->vqs[idx]; + + vq->num = num; +} + +static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa, + u16 idx, bool ready) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + struct vduse_virtqueue *vq = &dev->vqs[idx]; + + vq->ready = ready; +} + +static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + struct vduse_virtqueue *vq = &dev->vqs[idx]; + + return vq->ready; +} + +static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, + const struct vdpa_vq_state *state) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + struct vduse_virtqueue *vq = &dev->vqs[idx]; + + if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { + vq->state.packed.last_avail_counter = + state->packed.last_avail_counter; + vq->state.packed.last_avail_idx = state->packed.last_avail_idx; + vq->state.packed.last_used_counter = + state->packed.last_used_counter; + vq->state.packed.last_used_idx = state->packed.last_used_idx; + } else + vq->state.split.avail_index = state->split.avail_index; + + return 0; +} + +static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, + struct vdpa_vq_state *state) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + struct vduse_virtqueue *vq = &dev->vqs[idx]; + + if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) + return vduse_dev_get_vq_state_packed(dev, vq, &state->packed); + + return vduse_dev_get_vq_state_split(dev, vq, &state->split); +} + +static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + + return dev->vq_align; +} + +static u64 vduse_vdpa_get_features(struct vdpa_device *vdpa) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + + return dev->device_features; +} + +static int vduse_vdpa_set_features(struct vdpa_device *vdpa, u64 features) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + + dev->driver_features = features; + return 0; +} + +static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa, + struct vdpa_callback *cb) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + + spin_lock(&dev->irq_lock); + dev->config_cb.callback = cb->callback; + dev->config_cb.private = cb->private; + spin_unlock(&dev->irq_lock); +} + +static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + u16 num_max = 0; + int i; + + for (i = 0; i < dev->vq_num; i++) + if (num_max < dev->vqs[i].num_max) + num_max = dev->vqs[i].num_max; + + return num_max; +} + +static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + + return dev->device_id; +} + +static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + + return dev->vendor_id; +} + +static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + + return dev->status; +} + +static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + + if (vduse_dev_set_status(dev, status)) + return; + + dev->status = status; +} + +static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + + return dev->config_size; +} + +static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset, + void *buf, unsigned int len) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + + if (len > dev->config_size - offset) + return; + + memcpy(buf, dev->config + offset, len); +} + +static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset, + const void *buf, unsigned int len) +{ + /* Now we only support read-only configuration space */ +} + +static int vduse_vdpa_reset(struct vdpa_device *vdpa) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + + if (vduse_dev_set_status(dev, 0)) + return -EIO; + + vduse_dev_reset(dev); + + return 0; +} + +static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + + return dev->generation; +} + +static int vduse_vdpa_set_map(struct vdpa_device *vdpa, + struct vhost_iotlb *iotlb) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + int ret; + + ret = vduse_domain_set_map(dev->domain, iotlb); + if (ret) + return ret; + + ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX); + if (ret) { + vduse_domain_clear_map(dev->domain, iotlb); + return ret; + } + + return 0; +} + +static void vduse_vdpa_free(struct vdpa_device *vdpa) +{ + struct vduse_dev *dev = vdpa_to_vduse(vdpa); + + dev->vdev = NULL; +} + +static const struct vdpa_config_ops vduse_vdpa_config_ops = { + .set_vq_address = vduse_vdpa_set_vq_address, + .kick_vq = vduse_vdpa_kick_vq, + .set_vq_cb = vduse_vdpa_set_vq_cb, + .set_vq_num = vduse_vdpa_set_vq_num, + .set_vq_ready = vduse_vdpa_set_vq_ready, + .get_vq_ready = vduse_vdpa_get_vq_ready, + .set_vq_state = vduse_vdpa_set_vq_state, + .get_vq_state = vduse_vdpa_get_vq_state, + .get_vq_align = vduse_vdpa_get_vq_align, + .get_features = vduse_vdpa_get_features, + .set_features = vduse_vdpa_set_features, + .set_config_cb = vduse_vdpa_set_config_cb, + .get_vq_num_max = vduse_vdpa_get_vq_num_max, + .get_device_id = vduse_vdpa_get_device_id, + .get_vendor_id = vduse_vdpa_get_vendor_id, + .get_status = vduse_vdpa_get_status, + .set_status = vduse_vdpa_set_status, + .get_config_size = vduse_vdpa_get_config_size, + .get_config = vduse_vdpa_get_config, + .set_config = vduse_vdpa_set_config, + .get_generation = vduse_vdpa_get_generation, + .reset = vduse_vdpa_reset, + .set_map = vduse_vdpa_set_map, + .free = vduse_vdpa_free, +}; + +static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + struct vduse_dev *vdev = dev_to_vduse(dev); + struct vduse_iova_domain *domain = vdev->domain; + + return vduse_domain_map_page(domain, page, offset, size, dir, attrs); +} + +static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct vduse_dev *vdev = dev_to_vduse(dev); + struct vduse_iova_domain *domain = vdev->domain; + + return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs); +} + +static void *vduse_dev_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag, + unsigned long attrs) +{ + struct vduse_dev *vdev = dev_to_vduse(dev); + struct vduse_iova_domain *domain = vdev->domain; + unsigned long iova; + void *addr; + + *dma_addr = DMA_MAPPING_ERROR; + addr = vduse_domain_alloc_coherent(domain, size, + (dma_addr_t *)&iova, flag, attrs); + if (!addr) + return NULL; + + *dma_addr = (dma_addr_t)iova; + + return addr; +} + +static void vduse_dev_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_addr, + unsigned long attrs) +{ + struct vduse_dev *vdev = dev_to_vduse(dev); + struct vduse_iova_domain *domain = vdev->domain; + + vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs); +} + +static size_t vduse_dev_max_mapping_size(struct device *dev) +{ + struct vduse_dev *vdev = dev_to_vduse(dev); + struct vduse_iova_domain *domain = vdev->domain; + + return domain->bounce_size; +} + +static const struct dma_map_ops vduse_dev_dma_ops = { + .map_page = vduse_dev_map_page, + .unmap_page = vduse_dev_unmap_page, + .alloc = vduse_dev_alloc_coherent, + .free = vduse_dev_free_coherent, + .max_mapping_size = vduse_dev_max_mapping_size, +}; + +static unsigned int perm_to_file_flags(u8 perm) +{ + unsigned int flags = 0; + + switch (perm) { + case VDUSE_ACCESS_WO: + flags |= O_WRONLY; + break; + case VDUSE_ACCESS_RO: + flags |= O_RDONLY; + break; + case VDUSE_ACCESS_RW: + flags |= O_RDWR; + break; + default: + WARN(1, "invalidate vhost IOTLB permission\n"); + break; + } + + return flags; +} + +static int vduse_kickfd_setup(struct vduse_dev *dev, + struct vduse_vq_eventfd *eventfd) +{ + struct eventfd_ctx *ctx = NULL; + struct vduse_virtqueue *vq; + u32 index; + + if (eventfd->index >= dev->vq_num) + return -EINVAL; + + index = array_index_nospec(eventfd->index, dev->vq_num); + vq = &dev->vqs[index]; + if (eventfd->fd >= 0) { + ctx = eventfd_ctx_fdget(eventfd->fd); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN) + return 0; + + spin_lock(&vq->kick_lock); + if (vq->kickfd) + eventfd_ctx_put(vq->kickfd); + vq->kickfd = ctx; + if (vq->ready && vq->kicked && vq->kickfd) { + eventfd_signal(vq->kickfd, 1); + vq->kicked = false; + } + spin_unlock(&vq->kick_lock); + + return 0; +} + +static bool vduse_dev_is_ready(struct vduse_dev *dev) +{ + int i; + + for (i = 0; i < dev->vq_num; i++) + if (!dev->vqs[i].num_max) + return false; + + return true; +} + +static void vduse_dev_irq_inject(struct work_struct *work) +{ + struct vduse_dev *dev = container_of(work, struct vduse_dev, inject); + + spin_lock_irq(&dev->irq_lock); + if (dev->config_cb.callback) + dev->config_cb.callback(dev->config_cb.private); + spin_unlock_irq(&dev->irq_lock); +} + +static void vduse_vq_irq_inject(struct work_struct *work) +{ + struct vduse_virtqueue *vq = container_of(work, + struct vduse_virtqueue, inject); + + spin_lock_irq(&vq->irq_lock); + if (vq->ready && vq->cb.callback) + vq->cb.callback(vq->cb.private); + spin_unlock_irq(&vq->irq_lock); +} + +static long vduse_dev_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct vduse_dev *dev = file->private_data; + void __user *argp = (void __user *)arg; + int ret; + + if (unlikely(dev->broken)) + return -EPERM; + + switch (cmd) { + case VDUSE_IOTLB_GET_FD: { + struct vduse_iotlb_entry entry; + struct vhost_iotlb_map *map; + struct vdpa_map_file *map_file; + struct vduse_iova_domain *domain = dev->domain; + struct file *f = NULL; + + ret = -EFAULT; + if (copy_from_user(&entry, argp, sizeof(entry))) + break; + + ret = -EINVAL; + if (entry.start > entry.last) + break; + + spin_lock(&domain->iotlb_lock); + map = vhost_iotlb_itree_first(domain->iotlb, + entry.start, entry.last); + if (map) { + map_file = (struct vdpa_map_file *)map->opaque; + f = get_file(map_file->file); + entry.offset = map_file->offset; + entry.start = map->start; + entry.last = map->last; + entry.perm = map->perm; + } + spin_unlock(&domain->iotlb_lock); + ret = -EINVAL; + if (!f) + break; + + ret = -EFAULT; + if (copy_to_user(argp, &entry, sizeof(entry))) { + fput(f); + break; + } + ret = receive_fd(f, perm_to_file_flags(entry.perm)); + fput(f); + break; + } + case VDUSE_DEV_GET_FEATURES: + /* + * Just mirror what driver wrote here. + * The driver is expected to check FEATURE_OK later. + */ + ret = put_user(dev->driver_features, (u64 __user *)argp); + break; + case VDUSE_DEV_SET_CONFIG: { + struct vduse_config_data config; + unsigned long size = offsetof(struct vduse_config_data, + buffer); + + ret = -EFAULT; + if (copy_from_user(&config, argp, size)) + break; + + ret = -EINVAL; + if (config.length == 0 || + config.length > dev->config_size - config.offset) + break; + + ret = -EFAULT; + if (copy_from_user(dev->config + config.offset, argp + size, + config.length)) + break; + + ret = 0; + break; + } + case VDUSE_DEV_INJECT_CONFIG_IRQ: + ret = 0; + queue_work(vduse_irq_wq, &dev->inject); + break; + case VDUSE_VQ_SETUP: { + struct vduse_vq_config config; + u32 index; + + ret = -EFAULT; + if (copy_from_user(&config, argp, sizeof(config))) + break; + + ret = -EINVAL; + if (config.index >= dev->vq_num) + break; + + if (!is_mem_zero((const char *)config.reserved, + sizeof(config.reserved))) + break; + + index = array_index_nospec(config.index, dev->vq_num); + dev->vqs[index].num_max = config.max_size; + ret = 0; + break; + } + case VDUSE_VQ_GET_INFO: { + struct vduse_vq_info vq_info; + struct vduse_virtqueue *vq; + u32 index; + + ret = -EFAULT; + if (copy_from_user(&vq_info, argp, sizeof(vq_info))) + break; + + ret = -EINVAL; + if (vq_info.index >= dev->vq_num) + break; + + index = array_index_nospec(vq_info.index, dev->vq_num); + vq = &dev->vqs[index]; + vq_info.desc_addr = vq->desc_addr; + vq_info.driver_addr = vq->driver_addr; + vq_info.device_addr = vq->device_addr; + vq_info.num = vq->num; + + if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { + vq_info.packed.last_avail_counter = + vq->state.packed.last_avail_counter; + vq_info.packed.last_avail_idx = + vq->state.packed.last_avail_idx; + vq_info.packed.last_used_counter = + vq->state.packed.last_used_counter; + vq_info.packed.last_used_idx = + vq->state.packed.last_used_idx; + } else + vq_info.split.avail_index = + vq->state.split.avail_index; + + vq_info.ready = vq->ready; + + ret = -EFAULT; + if (copy_to_user(argp, &vq_info, sizeof(vq_info))) + break; + + ret = 0; + break; + } + case VDUSE_VQ_SETUP_KICKFD: { + struct vduse_vq_eventfd eventfd; + + ret = -EFAULT; + if (copy_from_user(&eventfd, argp, sizeof(eventfd))) + break; + + ret = vduse_kickfd_setup(dev, &eventfd); + break; + } + case VDUSE_VQ_INJECT_IRQ: { + u32 index; + + ret = -EFAULT; + if (get_user(index, (u32 __user *)argp)) + break; + + ret = -EINVAL; + if (index >= dev->vq_num) + break; + + ret = 0; + index = array_index_nospec(index, dev->vq_num); + queue_work(vduse_irq_wq, &dev->vqs[index].inject); + break; + } + default: + ret = -ENOIOCTLCMD; + break; + } + + return ret; +} + +static int vduse_dev_release(struct inode *inode, struct file *file) +{ + struct vduse_dev *dev = file->private_data; + + spin_lock(&dev->msg_lock); + /* Make sure the inflight messages can processed after reconncection */ + list_splice_init(&dev->recv_list, &dev->send_list); + spin_unlock(&dev->msg_lock); + dev->connected = false; + + return 0; +} + +static struct vduse_dev *vduse_dev_get_from_minor(int minor) +{ + struct vduse_dev *dev; + + mutex_lock(&vduse_lock); + dev = idr_find(&vduse_idr, minor); + mutex_unlock(&vduse_lock); + + return dev; +} + +static int vduse_dev_open(struct inode *inode, struct file *file) +{ + int ret; + struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode)); + + if (!dev) + return -ENODEV; + + ret = -EBUSY; + mutex_lock(&dev->lock); + if (dev->connected) + goto unlock; + + ret = 0; + dev->connected = true; + file->private_data = dev; +unlock: + mutex_unlock(&dev->lock); + + return ret; +} + +static const struct file_operations vduse_dev_fops = { + .owner = THIS_MODULE, + .open = vduse_dev_open, + .release = vduse_dev_release, + .read_iter = vduse_dev_read_iter, + .write_iter = vduse_dev_write_iter, + .poll = vduse_dev_poll, + .unlocked_ioctl = vduse_dev_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .llseek = noop_llseek, +}; + +static struct vduse_dev *vduse_dev_create(void) +{ + struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL); + + if (!dev) + return NULL; + + mutex_init(&dev->lock); + spin_lock_init(&dev->msg_lock); + INIT_LIST_HEAD(&dev->send_list); + INIT_LIST_HEAD(&dev->recv_list); + spin_lock_init(&dev->irq_lock); + + INIT_WORK(&dev->inject, vduse_dev_irq_inject); + init_waitqueue_head(&dev->waitq); + + return dev; +} + +static void vduse_dev_destroy(struct vduse_dev *dev) +{ + kfree(dev); +} + +static struct vduse_dev *vduse_find_dev(const char *name) +{ + struct vduse_dev *dev; + int id; + + idr_for_each_entry(&vduse_idr, dev, id) + if (!strcmp(dev->name, name)) + return dev; + + return NULL; +} + +static int vduse_destroy_dev(char *name) +{ + struct vduse_dev *dev = vduse_find_dev(name); + + if (!dev) + return -EINVAL; + + mutex_lock(&dev->lock); + if (dev->vdev || dev->connected) { + mutex_unlock(&dev->lock); + return -EBUSY; + } + dev->connected = true; + mutex_unlock(&dev->lock); + + vduse_dev_reset(dev); + device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); + idr_remove(&vduse_idr, dev->minor); + kvfree(dev->config); + kfree(dev->vqs); + vduse_domain_destroy(dev->domain); + kfree(dev->name); + vduse_dev_destroy(dev); + module_put(THIS_MODULE); + + return 0; +} + +static bool device_is_allowed(u32 device_id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++) + if (allowed_device_id[i] == device_id) + return true; + + return false; +} + +static bool features_is_valid(u64 features) +{ + if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) + return false; + + /* Now we only support read-only configuration space */ + if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE)) + return false; + + return true; +} + +static bool vduse_validate_config(struct vduse_dev_config *config) +{ + if (!is_mem_zero((const char *)config->reserved, + sizeof(config->reserved))) + return false; + + if (config->vq_align > PAGE_SIZE) + return false; + + if (config->config_size > PAGE_SIZE) + return false; + + if (!device_is_allowed(config->device_id)) + return false; + + if (!features_is_valid(config->features)) + return false; + + return true; +} + +static ssize_t msg_timeout_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct vduse_dev *dev = dev_get_drvdata(device); + + return sysfs_emit(buf, "%u\n", dev->msg_timeout); +} + +static ssize_t msg_timeout_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct vduse_dev *dev = dev_get_drvdata(device); + int ret; + + ret = kstrtouint(buf, 10, &dev->msg_timeout); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR_RW(msg_timeout); + +static struct attribute *vduse_dev_attrs[] = { + &dev_attr_msg_timeout.attr, + NULL +}; + +ATTRIBUTE_GROUPS(vduse_dev); + +static int vduse_create_dev(struct vduse_dev_config *config, + void *config_buf, u64 api_version) +{ + int i, ret; + struct vduse_dev *dev; + + ret = -EEXIST; + if (vduse_find_dev(config->name)) + goto err; + + ret = -ENOMEM; + dev = vduse_dev_create(); + if (!dev) + goto err; + + dev->api_version = api_version; + dev->device_features = config->features; + dev->device_id = config->device_id; + dev->vendor_id = config->vendor_id; + dev->name = kstrdup(config->name, GFP_KERNEL); + if (!dev->name) + goto err_str; + + dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1, + VDUSE_BOUNCE_SIZE); + if (!dev->domain) + goto err_domain; + + dev->config = config_buf; + dev->config_size = config->config_size; + dev->vq_align = config->vq_align; + dev->vq_num = config->vq_num; + dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL); + if (!dev->vqs) + goto err_vqs; + + for (i = 0; i < dev->vq_num; i++) { + dev->vqs[i].index = i; + INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject); + INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work); + spin_lock_init(&dev->vqs[i].kick_lock); + spin_lock_init(&dev->vqs[i].irq_lock); + } + + ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL); + if (ret < 0) + goto err_idr; + + dev->minor = ret; + dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT; + dev->dev = device_create(vduse_class, NULL, + MKDEV(MAJOR(vduse_major), dev->minor), + dev, "%s", config->name); + if (IS_ERR(dev->dev)) { + ret = PTR_ERR(dev->dev); + goto err_dev; + } + __module_get(THIS_MODULE); + + return 0; +err_dev: + idr_remove(&vduse_idr, dev->minor); +err_idr: + kfree(dev->vqs); +err_vqs: + vduse_domain_destroy(dev->domain); +err_domain: + kfree(dev->name); +err_str: + vduse_dev_destroy(dev); +err: + kvfree(config_buf); + return ret; +} + +static long vduse_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int ret; + void __user *argp = (void __user *)arg; + struct vduse_control *control = file->private_data; + + mutex_lock(&vduse_lock); + switch (cmd) { + case VDUSE_GET_API_VERSION: + ret = put_user(control->api_version, (u64 __user *)argp); + break; + case VDUSE_SET_API_VERSION: { + u64 api_version; + + ret = -EFAULT; + if (get_user(api_version, (u64 __user *)argp)) + break; + + ret = -EINVAL; + if (api_version > VDUSE_API_VERSION) + break; + + ret = 0; + control->api_version = api_version; + break; + } + case VDUSE_CREATE_DEV: { + struct vduse_dev_config config; + unsigned long size = offsetof(struct vduse_dev_config, config); + void *buf; + + ret = -EFAULT; + if (copy_from_user(&config, argp, size)) + break; + + ret = -EINVAL; + if (vduse_validate_config(&config) == false) + break; + + buf = vmemdup_user(argp + size, config.config_size); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + break; + } + config.name[VDUSE_NAME_MAX - 1] = '\0'; + ret = vduse_create_dev(&config, buf, control->api_version); + break; + } + case VDUSE_DESTROY_DEV: { + char name[VDUSE_NAME_MAX]; + + ret = -EFAULT; + if (copy_from_user(name, argp, VDUSE_NAME_MAX)) + break; + + name[VDUSE_NAME_MAX - 1] = '\0'; + ret = vduse_destroy_dev(name); + break; + } + default: + ret = -EINVAL; + break; + } + mutex_unlock(&vduse_lock); + + return ret; +} + +static int vduse_release(struct inode *inode, struct file *file) +{ + struct vduse_control *control = file->private_data; + + kfree(control); + return 0; +} + +static int vduse_open(struct inode *inode, struct file *file) +{ + struct vduse_control *control; + + control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL); + if (!control) + return -ENOMEM; + + control->api_version = VDUSE_API_VERSION; + file->private_data = control; + + return 0; +} + +static const struct file_operations vduse_ctrl_fops = { + .owner = THIS_MODULE, + .open = vduse_open, + .release = vduse_release, + .unlocked_ioctl = vduse_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .llseek = noop_llseek, +}; + +static char *vduse_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); +} + +static void vduse_mgmtdev_release(struct device *dev) +{ +} + +static struct device vduse_mgmtdev = { + .init_name = "vduse", + .release = vduse_mgmtdev_release, +}; + +static struct vdpa_mgmt_dev mgmt_dev; + +static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) +{ + struct vduse_vdpa *vdev; + int ret; + + if (dev->vdev) + return -EEXIST; + + vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev, + &vduse_vdpa_config_ops, name, true); + if (IS_ERR(vdev)) + return PTR_ERR(vdev); + + dev->vdev = vdev; + vdev->dev = dev; + vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask; + ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64)); + if (ret) { + put_device(&vdev->vdpa.dev); + return ret; + } + set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); + vdev->vdpa.dma_dev = &vdev->vdpa.dev; + vdev->vdpa.mdev = &mgmt_dev; + + return 0; +} + +static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) +{ + struct vduse_dev *dev; + int ret; + + mutex_lock(&vduse_lock); + dev = vduse_find_dev(name); + if (!dev || !vduse_dev_is_ready(dev)) { + mutex_unlock(&vduse_lock); + return -EINVAL; + } + ret = vduse_dev_init_vdpa(dev, name); + mutex_unlock(&vduse_lock); + if (ret) + return ret; + + ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num); + if (ret) { + put_device(&dev->vdev->vdpa.dev); + return ret; + } + + return 0; +} + +static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev) +{ + _vdpa_unregister_device(dev); +} + +static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = { + .dev_add = vdpa_dev_add, + .dev_del = vdpa_dev_del, +}; + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +static struct vdpa_mgmt_dev mgmt_dev = { + .device = &vduse_mgmtdev, + .id_table = id_table, + .ops = &vdpa_dev_mgmtdev_ops, +}; + +static int vduse_mgmtdev_init(void) +{ + int ret; + + ret = device_register(&vduse_mgmtdev); + if (ret) + return ret; + + ret = vdpa_mgmtdev_register(&mgmt_dev); + if (ret) + goto err; + + return 0; +err: + device_unregister(&vduse_mgmtdev); + return ret; +} + +static void vduse_mgmtdev_exit(void) +{ + vdpa_mgmtdev_unregister(&mgmt_dev); + device_unregister(&vduse_mgmtdev); +} + +static int vduse_init(void) +{ + int ret; + struct device *dev; + + vduse_class = class_create(THIS_MODULE, "vduse"); + if (IS_ERR(vduse_class)) + return PTR_ERR(vduse_class); + + vduse_class->devnode = vduse_devnode; + vduse_class->dev_groups = vduse_dev_groups; + + ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse"); + if (ret) + goto err_chardev_region; + + /* /dev/vduse/control */ + cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops); + vduse_ctrl_cdev.owner = THIS_MODULE; + ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1); + if (ret) + goto err_ctrl_cdev; + + dev = device_create(vduse_class, NULL, vduse_major, NULL, "control"); + if (IS_ERR(dev)) { + ret = PTR_ERR(dev); + goto err_device; + } + + /* /dev/vduse/$DEVICE */ + cdev_init(&vduse_cdev, &vduse_dev_fops); + vduse_cdev.owner = THIS_MODULE; + ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1), + VDUSE_DEV_MAX - 1); + if (ret) + goto err_cdev; + + vduse_irq_wq = alloc_workqueue("vduse-irq", + WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0); + if (!vduse_irq_wq) + goto err_wq; + + ret = vduse_domain_init(); + if (ret) + goto err_domain; + + ret = vduse_mgmtdev_init(); + if (ret) + goto err_mgmtdev; + + return 0; +err_mgmtdev: + vduse_domain_exit(); +err_domain: + destroy_workqueue(vduse_irq_wq); +err_wq: + cdev_del(&vduse_cdev); +err_cdev: + device_destroy(vduse_class, vduse_major); +err_device: + cdev_del(&vduse_ctrl_cdev); +err_ctrl_cdev: + unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); +err_chardev_region: + class_destroy(vduse_class); + return ret; +} +module_init(vduse_init); + +static void vduse_exit(void) +{ + vduse_mgmtdev_exit(); + vduse_domain_exit(); + destroy_workqueue(vduse_irq_wq); + cdev_del(&vduse_cdev); + device_destroy(vduse_class, vduse_major); + cdev_del(&vduse_ctrl_cdev); + unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); + class_destroy(vduse_class); +} +module_exit(vduse_exit); + +MODULE_LICENSE(DRV_LICENSE); +MODULE_AUTHOR(DRV_AUTHOR); +MODULE_DESCRIPTION(DRV_DESC); diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index fe0527329857..5bcd00246d2e 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -189,10 +189,20 @@ static void vp_vdpa_set_status(struct vdpa_device *vdpa, u8 status) } vp_modern_set_status(mdev, status); +} - if (!(status & VIRTIO_CONFIG_S_DRIVER_OK) && - (s & VIRTIO_CONFIG_S_DRIVER_OK)) +static int vp_vdpa_reset(struct vdpa_device *vdpa) +{ + struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); + struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; + u8 s = vp_vdpa_get_status(vdpa); + + vp_modern_set_status(mdev, 0); + + if (s & VIRTIO_CONFIG_S_DRIVER_OK) vp_vdpa_free_irq(vp_vdpa); + + return 0; } static u16 vp_vdpa_get_vq_num_max(struct vdpa_device *vdpa) @@ -398,6 +408,7 @@ static const struct vdpa_config_ops vp_vdpa_ops = { .set_features = vp_vdpa_set_features, .get_status = vp_vdpa_get_status, .set_status = vp_vdpa_set_status, + .reset = vp_vdpa_reset, .get_vq_num_max = vp_vdpa_get_vq_num_max, .get_vq_state = vp_vdpa_get_vq_state, .get_vq_notification = vp_vdpa_get_vq_notification, @@ -435,7 +446,7 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) return ret; vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa, - dev, &vp_vdpa_ops, NULL); + dev, &vp_vdpa_ops, NULL, false); if (IS_ERR(vp_vdpa)) { dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n"); return PTR_ERR(vp_vdpa); diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c index 0582079e4bcc..670d56c879e5 100644 --- a/drivers/vhost/iotlb.c +++ b/drivers/vhost/iotlb.c @@ -36,19 +36,21 @@ void vhost_iotlb_map_free(struct vhost_iotlb *iotlb, EXPORT_SYMBOL_GPL(vhost_iotlb_map_free); /** - * vhost_iotlb_add_range - add a new range to vhost IOTLB + * vhost_iotlb_add_range_ctx - add a new range to vhost IOTLB * @iotlb: the IOTLB * @start: start of the IOVA range * @last: last of IOVA range * @addr: the address that is mapped to @start * @perm: access permission of this range + * @opaque: the opaque pointer for the new mapping * * Returns an error last is smaller than start or memory allocation * fails */ -int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, - u64 start, u64 last, - u64 addr, unsigned int perm) +int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb, + u64 start, u64 last, + u64 addr, unsigned int perm, + void *opaque) { struct vhost_iotlb_map *map; @@ -71,6 +73,7 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, map->last = last; map->addr = addr; map->perm = perm; + map->opaque = opaque; iotlb->nmaps++; vhost_iotlb_itree_insert(map, &iotlb->root); @@ -80,6 +83,15 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, return 0; } +EXPORT_SYMBOL_GPL(vhost_iotlb_add_range_ctx); + +int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, + u64 start, u64 last, + u64 addr, unsigned int perm) +{ + return vhost_iotlb_add_range_ctx(iotlb, start, last, + addr, perm, NULL); +} EXPORT_SYMBOL_GPL(vhost_iotlb_add_range); /** diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 3a249ee7e144..28ef323882fb 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -467,7 +467,7 @@ static void vhost_tx_batch(struct vhost_net *net, .num = nvq->batched_xdp, .ptr = nvq->xdp, }; - int err; + int i, err; if (nvq->batched_xdp == 0) goto signal_used; @@ -476,6 +476,15 @@ static void vhost_tx_batch(struct vhost_net *net, err = sock->ops->sendmsg(sock, msghdr, 0); if (unlikely(err < 0)) { vq_err(&nvq->vq, "Fail to batch sending packets\n"); + + /* free pages owned by XDP; since this is an unlikely error path, + * keep it simple and avoid more complex bulk update for the + * used pages + */ + for (i = 0; i < nvq->batched_xdp; ++i) + put_page(virt_to_head_page(nvq->xdp[i].data)); + nvq->batched_xdp = 0; + nvq->done_idx = 0; return; } diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 46f897e41217..532e204f2b1b 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1,24 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0+ /******************************************************************************* * Vhost kernel TCM fabric driver for virtio SCSI initiators * * (C) Copyright 2010-2013 Datera, Inc. * (C) Copyright 2010-2012 IBM Corp. * - * Licensed to the Linux Foundation under the General Public License (GPL) version 2. - * * Authors: Nicholas A. Bellinger <nab@daterainc.com> * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * ****************************************************************************/ #include <linux/module.h> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 9479f7f79217..f41d081777f5 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -116,12 +116,13 @@ static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) irq_bypass_unregister_producer(&vq->call_ctx.producer); } -static void vhost_vdpa_reset(struct vhost_vdpa *v) +static int vhost_vdpa_reset(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; - vdpa_reset(vdpa); v->in_batch = 0; + + return vdpa_reset(vdpa); } static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) @@ -157,7 +158,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; u8 status, status_old; - int nvqs = v->nvqs; + int ret, nvqs = v->nvqs; u16 i; if (copy_from_user(&status, statusp, sizeof(status))) @@ -172,7 +173,12 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) if (status != 0 && (ops->get_status(vdpa) & ~status) != 0) return -EINVAL; - ops->set_status(vdpa, status); + if (status == 0) { + ret = ops->reset(vdpa); + if (ret) + return ret; + } else + ops->set_status(vdpa, status); if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) for (i = 0; i < nvqs; i++) @@ -498,7 +504,7 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, return r; } -static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) +static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last) { struct vhost_dev *dev = &v->vdev; struct vhost_iotlb *iotlb = dev->iotlb; @@ -507,19 +513,44 @@ static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) unsigned long pfn, pinned; while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { - pinned = map->size >> PAGE_SHIFT; - for (pfn = map->addr >> PAGE_SHIFT; + pinned = PFN_DOWN(map->size); + for (pfn = PFN_DOWN(map->addr); pinned > 0; pfn++, pinned--) { page = pfn_to_page(pfn); if (map->perm & VHOST_ACCESS_WO) set_page_dirty_lock(page); unpin_user_page(page); } - atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm); + atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm); vhost_iotlb_map_free(iotlb, map); } } +static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, u64 start, u64 last) +{ + struct vhost_dev *dev = &v->vdev; + struct vhost_iotlb *iotlb = dev->iotlb; + struct vhost_iotlb_map *map; + struct vdpa_map_file *map_file; + + while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { + map_file = (struct vdpa_map_file *)map->opaque; + fput(map_file->file); + kfree(map_file); + vhost_iotlb_map_free(iotlb, map); + } +} + +static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) +{ + struct vdpa_device *vdpa = v->vdpa; + + if (vdpa->use_va) + return vhost_vdpa_va_unmap(v, start, last); + + return vhost_vdpa_pa_unmap(v, start, last); +} + static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v) { struct vhost_dev *dev = &v->vdev; @@ -551,21 +582,21 @@ static int perm_to_iommu_flags(u32 perm) return flags | IOMMU_CACHE; } -static int vhost_vdpa_map(struct vhost_vdpa *v, - u64 iova, u64 size, u64 pa, u32 perm) +static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, + u64 size, u64 pa, u32 perm, void *opaque) { struct vhost_dev *dev = &v->vdev; struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; int r = 0; - r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1, - pa, perm); + r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1, + pa, perm, opaque); if (r) return r; if (ops->dma_map) { - r = ops->dma_map(vdpa, iova, size, pa, perm); + r = ops->dma_map(vdpa, iova, size, pa, perm, opaque); } else if (ops->set_map) { if (!v->in_batch) r = ops->set_map(vdpa, dev->iotlb); @@ -573,13 +604,15 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, r = iommu_map(v->domain, iova, pa, size, perm_to_iommu_flags(perm)); } - - if (r) + if (r) { vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); - else - atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm); + return r; + } - return r; + if (!vdpa->use_va) + atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm); + + return 0; } static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) @@ -600,38 +633,78 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) } } -static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, - struct vhost_iotlb_msg *msg) +static int vhost_vdpa_va_map(struct vhost_vdpa *v, + u64 iova, u64 size, u64 uaddr, u32 perm) +{ + struct vhost_dev *dev = &v->vdev; + u64 offset, map_size, map_iova = iova; + struct vdpa_map_file *map_file; + struct vm_area_struct *vma; + int ret; + + mmap_read_lock(dev->mm); + + while (size) { + vma = find_vma(dev->mm, uaddr); + if (!vma) { + ret = -EINVAL; + break; + } + map_size = min(size, vma->vm_end - uaddr); + if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) && + !(vma->vm_flags & (VM_IO | VM_PFNMAP)))) + goto next; + + map_file = kzalloc(sizeof(*map_file), GFP_KERNEL); + if (!map_file) { + ret = -ENOMEM; + break; + } + offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; + map_file->offset = offset; + map_file->file = get_file(vma->vm_file); + ret = vhost_vdpa_map(v, map_iova, map_size, uaddr, + perm, map_file); + if (ret) { + fput(map_file->file); + kfree(map_file); + break; + } +next: + size -= map_size; + uaddr += map_size; + map_iova += map_size; + } + if (ret) + vhost_vdpa_unmap(v, iova, map_iova - iova); + + mmap_read_unlock(dev->mm); + + return ret; +} + +static int vhost_vdpa_pa_map(struct vhost_vdpa *v, + u64 iova, u64 size, u64 uaddr, u32 perm) { struct vhost_dev *dev = &v->vdev; - struct vhost_iotlb *iotlb = dev->iotlb; struct page **page_list; unsigned long list_size = PAGE_SIZE / sizeof(struct page *); unsigned int gup_flags = FOLL_LONGTERM; unsigned long npages, cur_base, map_pfn, last_pfn = 0; unsigned long lock_limit, sz2pin, nchunks, i; - u64 iova = msg->iova; + u64 start = iova; long pinned; int ret = 0; - if (msg->iova < v->range.first || !msg->size || - msg->iova > U64_MAX - msg->size + 1 || - msg->iova + msg->size - 1 > v->range.last) - return -EINVAL; - - if (vhost_iotlb_itree_first(iotlb, msg->iova, - msg->iova + msg->size - 1)) - return -EEXIST; - /* Limit the use of memory for bookkeeping */ page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) return -ENOMEM; - if (msg->perm & VHOST_ACCESS_WO) + if (perm & VHOST_ACCESS_WO) gup_flags |= FOLL_WRITE; - npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; + npages = PFN_UP(size + (iova & ~PAGE_MASK)); if (!npages) { ret = -EINVAL; goto free; @@ -639,13 +712,13 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, mmap_read_lock(dev->mm); - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK)); if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { ret = -ENOMEM; goto unlock; } - cur_base = msg->uaddr & PAGE_MASK; + cur_base = uaddr & PAGE_MASK; iova &= PAGE_MASK; nchunks = 0; @@ -673,10 +746,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, if (last_pfn && (this_pfn != last_pfn + 1)) { /* Pin a contiguous chunk of memory */ - csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; + csize = PFN_PHYS(last_pfn - map_pfn + 1); ret = vhost_vdpa_map(v, iova, csize, - map_pfn << PAGE_SHIFT, - msg->perm); + PFN_PHYS(map_pfn), + perm, NULL); if (ret) { /* * Unpin the pages that are left unmapped @@ -699,13 +772,13 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, last_pfn = this_pfn; } - cur_base += pinned << PAGE_SHIFT; + cur_base += PFN_PHYS(pinned); npages -= pinned; } /* Pin the rest chunk */ - ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, - map_pfn << PAGE_SHIFT, msg->perm); + ret = vhost_vdpa_map(v, iova, PFN_PHYS(last_pfn - map_pfn + 1), + PFN_PHYS(map_pfn), perm, NULL); out: if (ret) { if (nchunks) { @@ -724,13 +797,38 @@ out: for (pfn = map_pfn; pfn <= last_pfn; pfn++) unpin_user_page(pfn_to_page(pfn)); } - vhost_vdpa_unmap(v, msg->iova, msg->size); + vhost_vdpa_unmap(v, start, size); } unlock: mmap_read_unlock(dev->mm); free: free_page((unsigned long)page_list); return ret; + +} + +static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, + struct vhost_iotlb_msg *msg) +{ + struct vhost_dev *dev = &v->vdev; + struct vdpa_device *vdpa = v->vdpa; + struct vhost_iotlb *iotlb = dev->iotlb; + + if (msg->iova < v->range.first || !msg->size || + msg->iova > U64_MAX - msg->size + 1 || + msg->iova + msg->size - 1 > v->range.last) + return -EINVAL; + + if (vhost_iotlb_itree_first(iotlb, msg->iova, + msg->iova + msg->size - 1)) + return -EEXIST; + + if (vdpa->use_va) + return vhost_vdpa_va_map(v, msg->iova, msg->size, + msg->uaddr, msg->perm); + + return vhost_vdpa_pa_map(v, msg->iova, msg->size, msg->uaddr, + msg->perm); } static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, @@ -860,7 +958,9 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) return -EBUSY; nvqs = v->nvqs; - vhost_vdpa_reset(v); + r = vhost_vdpa_reset(v); + if (r) + goto err; vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); if (!vqs) { @@ -945,7 +1045,7 @@ static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); if (remap_pfn_range(vma, vmf->address & PAGE_MASK, - notify.addr >> PAGE_SHIFT, PAGE_SIZE, + PFN_DOWN(notify.addr), PAGE_SIZE, vma->vm_page_prot)) return VM_FAULT_SIGBUS; diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index f249622ef11b..938aefbc75ec 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -114,7 +114,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, size_t nbytes; size_t iov_len, payload_len; int head; - bool restore_flag = false; + u32 flags_to_restore = 0; spin_lock_bh(&vsock->send_pkt_list_lock); if (list_empty(&vsock->send_pkt_list)) { @@ -178,16 +178,21 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, * small rx buffers, headers of packets in rx queue are * created dynamically and are initialized with header * of current packet(except length). But in case of - * SOCK_SEQPACKET, we also must clear record delimeter - * bit(VIRTIO_VSOCK_SEQ_EOR). Otherwise, instead of one - * packet with delimeter(which marks end of record), - * there will be sequence of packets with delimeter - * bit set. After initialized header will be copied to - * rx buffer, this bit will be restored. + * SOCK_SEQPACKET, we also must clear message delimeter + * bit (VIRTIO_VSOCK_SEQ_EOM) and MSG_EOR bit + * (VIRTIO_VSOCK_SEQ_EOR) if set. Otherwise, + * there will be sequence of packets with these + * bits set. After initialized header will be copied to + * rx buffer, these required bits will be restored. */ - if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) { - pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); - restore_flag = true; + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) { + pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); + flags_to_restore |= VIRTIO_VSOCK_SEQ_EOM; + + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) { + pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); + flags_to_restore |= VIRTIO_VSOCK_SEQ_EOR; + } } } @@ -224,8 +229,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, * to send it with the next available buffer. */ if (pkt->off < pkt->len) { - if (restore_flag) - pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); + pkt->hdr.flags |= cpu_to_le32(flags_to_restore); /* We are queueing the same virtio_vsock_pkt to handle * the remaining bytes, and we want to deliver it diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index d33c5cd684c0..b26b79dfcac9 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -582,7 +582,9 @@ config FB_HP300 config FB_TGA tristate "TGA/SFB+ framebuffer support" - depends on FB && (ALPHA || TC) + depends on FB + depends on PCI || TC + depends on ALPHA || TC select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 71fb710f1ce3..7420d2c16e47 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -962,6 +962,7 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) struct fb_var_screeninfo old_var; struct fb_videomode mode; struct fb_event event; + u32 unused; if (var->activate & FB_ACTIVATE_INV_MODE) { struct fb_videomode mode1, mode2; @@ -1008,6 +1009,11 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) if (var->xres < 8 || var->yres < 8) return -EINVAL; + /* Too huge resolution causes multiplication overflow. */ + if (check_mul_overflow(var->xres, var->yres, &unused) || + check_mul_overflow(var->xres_virtual, var->yres_virtual, &unused)) + return -EINVAL; + ret = info->fbops->fb_check_var(var, info); if (ret) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 1ea0c1f6a1fd..588e02fb91d3 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -4,6 +4,7 @@ #include <linux/virtio_config.h> #include <linux/module.h> #include <linux/idr.h> +#include <linux/of.h> #include <uapi/linux/virtio_ids.h> /* Unique numbering for virtio devices. */ @@ -292,6 +293,8 @@ static void virtio_dev_remove(struct device *_d) /* Acknowledge the device's existence again. */ virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); + + of_node_put(dev->dev.of_node); } static struct bus_type virtio_bus = { @@ -318,6 +321,43 @@ void unregister_virtio_driver(struct virtio_driver *driver) } EXPORT_SYMBOL_GPL(unregister_virtio_driver); +static int virtio_device_of_init(struct virtio_device *dev) +{ + struct device_node *np, *pnode = dev_of_node(dev->dev.parent); + char compat[] = "virtio,deviceXXXXXXXX"; + int ret, count; + + if (!pnode) + return 0; + + count = of_get_available_child_count(pnode); + if (!count) + return 0; + + /* There can be only 1 child node */ + if (WARN_ON(count > 1)) + return -EINVAL; + + np = of_get_next_available_child(pnode, NULL); + if (WARN_ON(!np)) + return -ENODEV; + + ret = snprintf(compat, sizeof(compat), "virtio,device%x", dev->id.device); + BUG_ON(ret >= sizeof(compat)); + + if (!of_device_is_compatible(np, compat)) { + ret = -EINVAL; + goto out; + } + + dev->dev.of_node = np; + return 0; + +out: + of_node_put(np); + return ret; +} + /** * register_virtio_device - register virtio device * @dev : virtio device to be registered @@ -342,6 +382,10 @@ int register_virtio_device(struct virtio_device *dev) dev->index = err; dev_set_name(&dev->dev, "virtio%u", dev->index); + err = virtio_device_of_init(dev); + if (err) + goto out_ida_remove; + spin_lock_init(&dev->config_lock); dev->config_enabled = false; dev->config_change_pending = false; @@ -362,10 +406,16 @@ int register_virtio_device(struct virtio_device *dev) */ err = device_add(&dev->dev); if (err) - ida_simple_remove(&virtio_index_ida, dev->index); + goto out_of_node_put; + + return 0; + +out_of_node_put: + of_node_put(dev->dev.of_node); +out_ida_remove: + ida_simple_remove(&virtio_index_ida, dev->index); out: - if (err) - virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); + virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return err; } EXPORT_SYMBOL_GPL(register_virtio_device); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 47dce91f788c..c22ff0117b46 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -531,8 +531,8 @@ static int init_vqs(struct virtio_balloon *vb) callbacks[VIRTIO_BALLOON_VQ_REPORTING] = balloon_ack; } - err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX, - vqs, callbacks, names, NULL, NULL); + err = virtio_find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX, vqs, + callbacks, names, NULL); if (err) return err; diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 5f1ce59b44b9..a37eb52fb401 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -214,7 +214,7 @@ config XEN_PVCALLS_FRONTEND implements them. config XEN_PVCALLS_BACKEND - bool "XEN PV Calls backend driver" + tristate "XEN PV Calls backend driver" depends on INET && XEN && XEN_BACKEND help Experimental backend for the Xen PV Calls protocol diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 671c71245a7b..2d2803883306 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -43,6 +43,8 @@ #include <linux/sched.h> #include <linux/cred.h> #include <linux/errno.h> +#include <linux/freezer.h> +#include <linux/kthread.h> #include <linux/mm.h> #include <linux/memblock.h> #include <linux/pagemap.h> @@ -115,7 +117,7 @@ static struct ctl_table xen_root[] = { #define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1) /* - * balloon_process() state: + * balloon_thread() state: * * BP_DONE: done or nothing to do, * BP_WAIT: wait to be rescheduled, @@ -130,6 +132,8 @@ enum bp_state { BP_ECANCELED }; +/* Main waiting point for xen-balloon thread. */ +static DECLARE_WAIT_QUEUE_HEAD(balloon_thread_wq); static DEFINE_MUTEX(balloon_mutex); @@ -144,10 +148,6 @@ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)]; static LIST_HEAD(ballooned_pages); static DECLARE_WAIT_QUEUE_HEAD(balloon_wq); -/* Main work function, always executed in process context. */ -static void balloon_process(struct work_struct *work); -static DECLARE_DELAYED_WORK(balloon_worker, balloon_process); - /* When ballooning out (allocating memory to return to Xen) we don't really want the kernel to try too hard since that can trigger the oom killer. */ #define GFP_BALLOON \ @@ -366,7 +366,7 @@ static void xen_online_page(struct page *page, unsigned int order) static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { if (val == MEM_ONLINE) - schedule_delayed_work(&balloon_worker, 0); + wake_up(&balloon_thread_wq); return NOTIFY_OK; } @@ -491,18 +491,43 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) } /* - * As this is a work item it is guaranteed to run as a single instance only. + * Stop waiting if either state is not BP_EAGAIN and ballooning action is + * needed, or if the credit has changed while state is BP_EAGAIN. + */ +static bool balloon_thread_cond(enum bp_state state, long credit) +{ + if (state != BP_EAGAIN) + credit = 0; + + return current_credit() != credit || kthread_should_stop(); +} + +/* + * As this is a kthread it is guaranteed to run as a single instance only. * We may of course race updates of the target counts (which are protected * by the balloon lock), or with changes to the Xen hard limit, but we will * recover from these in time. */ -static void balloon_process(struct work_struct *work) +static int balloon_thread(void *unused) { enum bp_state state = BP_DONE; long credit; + unsigned long timeout; + + set_freezable(); + for (;;) { + if (state == BP_EAGAIN) + timeout = balloon_stats.schedule_delay * HZ; + else + timeout = 3600 * HZ; + credit = current_credit(); + wait_event_interruptible_timeout(balloon_thread_wq, + balloon_thread_cond(state, credit), timeout); + + if (kthread_should_stop()) + return 0; - do { mutex_lock(&balloon_mutex); credit = current_credit(); @@ -529,12 +554,7 @@ static void balloon_process(struct work_struct *work) mutex_unlock(&balloon_mutex); cond_resched(); - - } while (credit && state == BP_DONE); - - /* Schedule more work if there is some still to be done. */ - if (state == BP_EAGAIN) - schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ); + } } /* Resets the Xen limit, sets new target, and kicks off processing. */ @@ -542,7 +562,7 @@ void balloon_set_new_target(unsigned long target) { /* No need for lock. Not read-modify-write updates. */ balloon_stats.target_pages = target; - schedule_delayed_work(&balloon_worker, 0); + wake_up(&balloon_thread_wq); } EXPORT_SYMBOL_GPL(balloon_set_new_target); @@ -647,7 +667,7 @@ void free_xenballooned_pages(int nr_pages, struct page **pages) /* The balloon may be too large now. Shrink it if needed. */ if (current_credit()) - schedule_delayed_work(&balloon_worker, 0); + wake_up(&balloon_thread_wq); mutex_unlock(&balloon_mutex); } @@ -679,6 +699,8 @@ static void __init balloon_add_region(unsigned long start_pfn, static int __init balloon_init(void) { + struct task_struct *task; + if (!xen_domain()) return -ENODEV; @@ -722,6 +744,12 @@ static int __init balloon_init(void) } #endif + task = kthread_run(balloon_thread, NULL, "xen-balloon"); + if (IS_ERR(task)) { + pr_err("xen-balloon thread could not be started, ballooning will not work!\n"); + return PTR_ERR(task); + } + /* Init the xen-balloon driver. */ xen_balloon_init(); diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 643fe440c46e..8c10edf9efe6 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -106,27 +106,26 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr) static int xen_swiotlb_fixup(void *buf, unsigned long nslabs) { - int i, rc; - int dma_bits; + int rc; + unsigned int order = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT); + unsigned int i, dma_bits = order + PAGE_SHIFT; dma_addr_t dma_handle; phys_addr_t p = virt_to_phys(buf); - dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT; + BUILD_BUG_ON(IO_TLB_SEGSIZE & (IO_TLB_SEGSIZE - 1)); + BUG_ON(nslabs % IO_TLB_SEGSIZE); i = 0; do { - int slabs = min(nslabs - i, (unsigned long)IO_TLB_SEGSIZE); - do { rc = xen_create_contiguous_region( - p + (i << IO_TLB_SHIFT), - get_order(slabs << IO_TLB_SHIFT), + p + (i << IO_TLB_SHIFT), order, dma_bits, &dma_handle); } while (rc && dma_bits++ < MAX_DMA_BITS); if (rc) return rc; - i += slabs; + i += IO_TLB_SEGSIZE; } while (i < nslabs); return 0; } @@ -153,9 +152,7 @@ static const char *xen_swiotlb_error(enum xen_swiotlb_err err) return ""; } -#define DEFAULT_NSLABS ALIGN(SZ_64M >> IO_TLB_SHIFT, IO_TLB_SEGSIZE) - -int __ref xen_swiotlb_init(void) +int xen_swiotlb_init(void) { enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN; unsigned long bytes = swiotlb_size_or_default(); @@ -185,7 +182,7 @@ retry: order--; } if (!start) - goto error; + goto exit; if (order != get_order(bytes)) { pr_warn("Warning: only able to allocate %ld MB for software IO TLB\n", (PAGE_SIZE << order) >> 20); @@ -208,15 +205,15 @@ retry: swiotlb_set_max_segment(PAGE_SIZE); return 0; error: - if (repeat--) { + if (nslabs > 1024 && repeat--) { /* Min is 2MB */ - nslabs = max(1024UL, (nslabs >> 1)); - pr_info("Lowering to %luMB\n", - (nslabs << IO_TLB_SHIFT) >> 20); + nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE)); + bytes = nslabs << IO_TLB_SHIFT; + pr_info("Lowering to %luMB\n", bytes >> 20); goto retry; } +exit: pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc); - free_pages((unsigned long)start, order); return rc; } @@ -244,9 +241,9 @@ retry: rc = xen_swiotlb_fixup(start, nslabs); if (rc) { memblock_free(__pa(start), PAGE_ALIGN(bytes)); - if (repeat--) { + if (nslabs > 1024 && repeat--) { /* Min is 2MB */ - nslabs = max(1024UL, (nslabs >> 1)); + nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE)); bytes = nslabs << IO_TLB_SHIFT; pr_info("Lowering to %luMB\n", bytes >> 20); goto retry; @@ -254,7 +251,7 @@ retry: panic("%s (rc:%d)", xen_swiotlb_error(XEN_SWIOTLB_EFIXUP), rc); } - if (swiotlb_init_with_tbl(start, nslabs, false)) + if (swiotlb_init_with_tbl(start, nslabs, true)) panic("Cannot allocate SWIOTLB buffer"); swiotlb_set_max_segment(PAGE_SIZE); } diff --git a/fs/Kconfig b/fs/Kconfig index 47af46a573ba..a6313a969bc5 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -367,7 +367,7 @@ source "fs/ceph/Kconfig" source "fs/cifs/Kconfig" source "fs/ksmbd/Kconfig" -config CIFS_COMMON +config SMBFS_COMMON tristate default y if CIFS=y default m if CIFS=m diff --git a/fs/Makefile b/fs/Makefile index 2f21300851ae..84c5e4cdfee5 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -17,7 +17,7 @@ obj-y := open.o read_write.o file_table.o super.o \ kernel_read_file.o remap_range.o ifeq ($(CONFIG_BLOCK),y) -obj-y += buffer.o block_dev.o direct-io.o mpage.o +obj-y += buffer.o direct-io.o mpage.o else obj-y += no-block.o endif @@ -96,7 +96,7 @@ obj-$(CONFIG_LOCKD) += lockd/ obj-$(CONFIG_NLS) += nls/ obj-$(CONFIG_UNICODE) += unicode/ obj-$(CONFIG_SYSV_FS) += sysv/ -obj-$(CONFIG_CIFS_COMMON) += cifs_common/ +obj-$(CONFIG_SMBFS_COMMON) += smbfs_common/ obj-$(CONFIG_CIFS) += cifs/ obj-$(CONFIG_SMB_SERVER) += ksmbd/ obj-$(CONFIG_HPFS_FS) += hpfs/ diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c index 8a3b30ec860c..8be57aaedab6 100644 --- a/fs/cifs/cache.c +++ b/fs/cifs/cache.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cache.c - CIFS filesystem cache index structure definitions + * CIFS filesystem cache index structure definitions * * Copyright (c) 2010 Novell, Inc. * Authors(s): Suresh Jayaraman (sjayaraman@suse.de> diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 51a824fc926a..de2c12bcfa4b 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * fs/cifs_debug.c * * Copyright (C) International Business Machines Corp., 2000,2005 * diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 4fd788586399..f97407520ea1 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifs_fs_sb.h * * Copyright (c) International Business Machines Corp., 2002,2004 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h index ef723be358af..b87cbbe6d2d4 100644 --- a/fs/cifs/cifs_ioctl.h +++ b/fs/cifs/cifs_ioctl.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifs_ioctl.h * * Structure definitions for io control for cifs/smb3 * diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 8fa26a8530f8..353bd0dd7026 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cifs_spnego.c -- SPNEGO upcall management for CIFS + * SPNEGO upcall management for CIFS * * Copyright (c) 2007 Red Hat, Inc. * Author(s): Jeff Layton (jlayton@redhat.com) diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h index 31387d0ea32e..e6a0451877d4 100644 --- a/fs/cifs/cifs_spnego.h +++ b/fs/cifs/cifs_spnego.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifs_spnego.h -- SPNEGO upcall management for CIFS + * SPNEGO upcall management for CIFS * * Copyright (c) 2007 Red Hat, Inc. * Author(s): Jeff Layton (jlayton@redhat.com) diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 171ad8b42107..e7582dd79179 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * fs/cifs/cifs_unicode.c * * Copyright (c) International Business Machines Corp., 2000,2009 * Modified by Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 388eb536cff1..ee3aab3dd4ac 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cifsacl.c * * Copyright (C) International Business Machines Corp., 2007,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h index f8292bcf8594..ccbfc754bd3c 100644 --- a/fs/cifs/cifsacl.h +++ b/fs/cifs/cifsacl.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifsacl.h * * Copyright (c) International Business Machines Corp., 2007 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 6679e07e533e..d118282071b3 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cifsencrypt.c * * Encryption and hashing operations relating to NTLM, NTLMv2. See MS-NLMP * for more detailed information @@ -22,7 +21,7 @@ #include <linux/random.h> #include <linux/highmem.h> #include <linux/fips.h> -#include "../cifs_common/arc4.h" +#include "../smbfs_common/arc4.h" #include <crypto/aead.h> int __cifs_calc_signature(struct smb_rqst *rqst, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8c20bfa187ac..9fa930dfd78d 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cifsfs.c * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index d25a4099b32e..b50da1901ebd 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifsfs.h * * Copyright (c) International Business Machines Corp., 2002, 2007 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index c068f7d8d879..e916470468ea 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifsglob.h * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) @@ -1400,6 +1399,7 @@ struct cifsInodeInfo { #define CIFS_INO_INVALID_MAPPING (4) /* pagecache is invalid */ #define CIFS_INO_LOCK (5) /* lock bit for synchronization */ #define CIFS_INO_MODIFIED_ATTR (6) /* Indicate change in mtime/ctime */ +#define CIFS_INO_CLOSE_ON_LOCK (7) /* Not to defer the close when lock is set */ unsigned long flags; spinlock_t writers_lock; unsigned int writers; /* Number of writers on this inode */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index dc920e206336..d2ff438fd31f 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifspdu.h * * Copyright (c) International Business Machines Corp., 2002,2009 * Author(s): Steve French (sfrench@us.ibm.com) @@ -12,7 +11,7 @@ #include <net/sock.h> #include <asm/unaligned.h> -#include "smbfsctl.h" +#include "../smbfs_common/smbfsctl.h" #define CIFS_PROT 0 #define POSIX_PROT (CIFS_PROT+1) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index f9740c21ca3d..d0f85b666662 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/cifsproto.h * * Copyright (c) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) @@ -268,6 +267,9 @@ extern void cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode); extern void cifs_close_all_deferred_files(struct cifs_tcon *cifs_tcon); +extern void cifs_close_deferred_file_under_dentry(struct cifs_tcon *cifs_tcon, + const char *path); + extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb3_fs_context *ctx); extern void cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a8e41c1e80ca..243d17696f06 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/cifssmb.c * * Copyright (C) International Business Machines Corp., 2002,2010 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0db344807ef1..7881115cfbee 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/connect.c * * Copyright (C) International Business Machines Corp., 2002,2011 * Author(s): Steve French (sfrench@us.ibm.com) @@ -1090,7 +1089,7 @@ next_pdu: module_put_and_exit(0); } -/** +/* * Returns true if srcaddr isn't specified and rhs isn't specified, or * if srcaddr is specified and matches the IP address of the rhs argument */ @@ -1550,6 +1549,9 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx) /** * cifs_setup_ipc - helper to setup the IPC tcon for the session + * @ses: smb session to issue the request on + * @ctx: the superblock configuration context to use for building the + * new tree connection for the IPC (interprocess communication RPC) * * A new IPC connection is made and stored in the session * tcon_ipc. The IPC tcon has the same lifetime as the session. @@ -1605,6 +1607,7 @@ out: /** * cifs_free_ipc - helper to release the session IPC tcon + * @ses: smb session to unmount the IPC from * * Needs to be called everytime a session is destroyed. * @@ -1855,6 +1858,8 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx __attribute__((unused)), /** * cifs_get_smb_ses - get a session matching @ctx data from @server + * @server: server to setup the session to + * @ctx: superblock configuration context to use to setup the session * * This function assumes it is being called from cifs_mount() where we * already got a server reference (server refcount +1). See @@ -2065,6 +2070,8 @@ cifs_put_tcon(struct cifs_tcon *tcon) /** * cifs_get_tcon - get a tcon matching @ctx data from @ses + * @ses: smb session to issue the request on + * @ctx: the superblock configuration context to use for building the * * - tcon refcount is the number of mount points using the tcon. * - ses refcount is the number of tcon using the session. @@ -3030,7 +3037,7 @@ build_unc_path_to_root(const struct smb3_fs_context *ctx, return full_path; } -/** +/* * expand_dfs_referral - Perform a dfs referral query and update the cifs_sb * * If a referral is found, cifs_sb->ctx->mount_options will be (re-)allocated diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 5f8a302ffcb2..6e8e7cc26ae2 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/dir.c * * vfs operations that deal with dentries * diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 8c616aaeb7c4..0458d28d71aa 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/dns_resolve.c * * Copyright (c) 2007 Igor Mammedov * Author(s): Igor Mammedov (niallain@gmail.com) diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h index 9fa2807ef79e..afc0df381246 100644 --- a/fs/cifs/dns_resolve.h +++ b/fs/cifs/dns_resolve.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/dns_resolve.h -- DNS Resolver upcall management for CIFS DFS - * Handles host name to IP address resolution + * DNS Resolver upcall management for CIFS DFS + * Handles host name to IP address resolution * * Copyright (c) International Business Machines Corp., 2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/export.c b/fs/cifs/export.c index 747a540db954..37c28415df1e 100644 --- a/fs/cifs/export.c +++ b/fs/cifs/export.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/export.c * * Copyright (C) International Business Machines Corp., 2007 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index d0216472f1c6..6796fc73b304 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/file.c * * vfs operations that deal with files * @@ -883,6 +882,7 @@ int cifs_close(struct inode *inode, struct file *file) dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL); if ((cinode->oplock == CIFS_CACHE_RHW_FLG) && cinode->lease_granted && + !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) && dclose) { if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { inode->i_ctime = inode->i_mtime = current_time(inode); @@ -1865,6 +1865,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag, tcon->ses->server); cifs_sb = CIFS_FILE_SB(file); + set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags); if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index fab47fa7df74..8eedd20c44ab 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/fscache.c - CIFS filesystem cache interface + * CIFS filesystem cache interface * * Copyright (c) 2010 Novell, Inc. * Author(s): Suresh Jayaraman <sjayaraman@suse.de> diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h index 82e856b9cf89..9baa1d0f22bd 100644 --- a/fs/cifs/fscache.h +++ b/fs/cifs/fscache.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/fscache.h - CIFS filesystem cache interface definitions + * CIFS filesystem cache interface definitions * * Copyright (c) 2010 Novell, Inc. * Authors(s): Suresh Jayaraman (sjayaraman@suse.de> diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 50c01cff4c84..82848412ad85 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/inode.c * * Copyright (C) International Business Machines Corp., 2002,2010 * Author(s): Steve French (sfrench@us.ibm.com) @@ -1625,7 +1624,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) goto unlink_out; } - cifs_close_deferred_file(CIFS_I(inode)); + cifs_close_deferred_file_under_dentry(tcon, full_path); if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { rc = CIFSPOSIXDelFile(xid, tcon, full_path, @@ -2114,9 +2113,9 @@ cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir, goto cifs_rename_exit; } - cifs_close_deferred_file(CIFS_I(d_inode(source_dentry))); + cifs_close_deferred_file_under_dentry(tcon, from_name); if (d_inode(target_dentry) != NULL) - cifs_close_deferred_file(CIFS_I(d_inode(target_dentry))); + cifs_close_deferred_file_under_dentry(tcon, to_name); rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, to_name); diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 42c6a0bac6c8..0359b604bdbc 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/ioctl.c * * vfs operations that deal with io control * @@ -359,7 +358,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) if (pSMBFile == NULL) break; tcon = tlink_tcon(pSMBFile->tlink); - caps = le64_to_cpu(tcon->fsUnixInfo.Capability); + /* caps = le64_to_cpu(tcon->fsUnixInfo.Capability); */ if (get_user(ExtAttrBits, (int __user *)arg)) { rc = -EFAULT; diff --git a/fs/cifs/link.c b/fs/cifs/link.c index f0a6d63bc08c..852e54ee82c2 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/link.c * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 9469f1cf0b46..03da00eb7c04 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/misc.c * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) @@ -736,7 +735,7 @@ cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode) if (cancel_delayed_work(&cfile->deferred)) { tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); if (tmp_list == NULL) - continue; + break; tmp_list->cfile = cfile; list_add_tail(&tmp_list->list, &file_head); } @@ -767,7 +766,7 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon) if (cancel_delayed_work(&cfile->deferred)) { tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); if (tmp_list == NULL) - continue; + break; tmp_list->cfile = cfile; list_add_tail(&tmp_list->list, &file_head); } @@ -781,6 +780,43 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon) kfree(tmp_list); } } +void +cifs_close_deferred_file_under_dentry(struct cifs_tcon *tcon, const char *path) +{ + struct cifsFileInfo *cfile; + struct list_head *tmp; + struct file_list *tmp_list, *tmp_next_list; + struct list_head file_head; + void *page; + const char *full_path; + + INIT_LIST_HEAD(&file_head); + page = alloc_dentry_path(); + spin_lock(&tcon->open_file_lock); + list_for_each(tmp, &tcon->openFileList) { + cfile = list_entry(tmp, struct cifsFileInfo, tlist); + full_path = build_path_from_dentry(cfile->dentry, page); + if (strstr(full_path, path)) { + if (delayed_work_pending(&cfile->deferred)) { + if (cancel_delayed_work(&cfile->deferred)) { + tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); + if (tmp_list == NULL) + break; + tmp_list->cfile = cfile; + list_add_tail(&tmp_list->list, &file_head); + } + } + } + } + spin_unlock(&tcon->open_file_lock); + + list_for_each_entry_safe(tmp_list, tmp_next_list, &file_head, list) { + _cifsFileInfo_put(tmp_list->cfile, true, false); + list_del(&tmp_list->list); + kfree(tmp_list); + } + free_dentry_path(page); +} /* parses DFS refferal V3 structure * caller is responsible for freeing target_nodes diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 0e728aac67e9..fa9fbd6a819c 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * fs/cifs/netmisc.c * * Copyright (c) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index 378133ce8869..25a2b8ef88b9 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/ntlmssp.h * * Copyright (c) International Business Machines Corp., 2002,2007 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 54d77c99e21c..1929e80c09ee 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/readdir.c * * Directory search handling * diff --git a/fs/cifs/rfc1002pdu.h b/fs/cifs/rfc1002pdu.h index 137f7c95afd6..ae1d025da294 100644 --- a/fs/cifs/rfc1002pdu.h +++ b/fs/cifs/rfc1002pdu.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/rfc1002pdu.h * * Protocol Data Unit definitions for RFC 1001/1002 support * diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 118403fbeda2..23e02db7923f 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/sess.c * * SMB/CIFS session setup handling routines * diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index c9d8a50062b8..f5dcc4940b6d 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/smb2file.c * * Copyright (C) International Business Machines Corp., 2002, 2011 * Author(s): Steve French (sfrench@us.ibm.com), diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index d0e9f3782bd9..ca692b2283cd 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/smb2glob.h * * Definitions for various global variables and structures * diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 957b2594f02e..8297703492ee 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/smb2inode.c * * Copyright (C) International Business Machines Corp., 2002, 2011 * Etersoft, 2012 diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 668f77108831..29b5554f6263 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/smb2misc.c * * Copyright (C) International Business Machines Corp., 2002,2011 * Etersoft, 2012 diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index ddc0e8f97872..bda606dc72b1 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -689,13 +689,19 @@ smb2_close_cached_fid(struct kref *ref) cifs_dbg(FYI, "clear cached root file handle\n"); SMB2_close(0, cfid->tcon, cfid->fid->persistent_fid, cfid->fid->volatile_fid); - cfid->is_valid = false; - cfid->file_all_info_is_valid = false; - cfid->has_lease = false; - if (cfid->dentry) { - dput(cfid->dentry); - cfid->dentry = NULL; - } + } + + /* + * We only check validity above to send SMB2_close, + * but we still need to invalidate these entries + * when this function is called + */ + cfid->is_valid = false; + cfid->file_all_info_is_valid = false; + cfid->has_lease = false; + if (cfid->dentry) { + dput(cfid->dentry); + cfid->dentry = NULL; } } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index b6d2e3591927..672ae78e866a 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/smb2pdu.c * * Copyright (C) International Business Machines Corp., 2009, 2013 * Etersoft, 2012 diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index e9cac7970b66..f32c99c9ba13 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/smb2pdu.h * * Copyright (c) International Business Machines Corp., 2009, 2013 * Etersoft, 2012 diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 263767f644f8..547945443fa7 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/smb2proto.h * * Copyright (c) International Business Machines Corp., 2002, 2011 * Etersoft, 2012 diff --git a/fs/cifs/smb2status.h b/fs/cifs/smb2status.h index 0215ef36e240..a9e958166fc5 100644 --- a/fs/cifs/smb2status.h +++ b/fs/cifs/smb2status.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/smb2status.h * * SMB2 Status code (network error) definitions * Definitions are from MS-ERREF diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 6f7952ea4941..f59b956f9d25 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/smb2transport.c * * Copyright (C) International Business Machines Corp., 2002, 2011 * Etersoft, 2012 diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 10047cc55286..4a0487753869 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -24,7 +24,7 @@ #include "cifsglob.h" #include "cifs_debug.h" #include "cifsproto.h" -#include "../cifs_common/md4.h" +#include "../smbfs_common/md4.h" #ifndef false #define false 0 diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h index 60189efb3236..aeffdad829e2 100644 --- a/fs/cifs/smberr.h +++ b/fs/cifs/smberr.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1 */ /* - * fs/cifs/smberr.h * * Copyright (c) International Business Machines Corp., 2002,2004 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 75a95de320cf..b7379329b741 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/transport.c * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/cifs/winucase.c b/fs/cifs/winucase.c index 59b6c577aa0a..2f075b5b50df 100644 --- a/fs/cifs/winucase.c +++ b/fs/cifs/winucase.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * fs/cifs/winucase.c * * Copyright (c) Jeffrey Layton <jlayton@redhat.com>, 2013 * diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 9ed481e79ce0..7d8b72d67c80 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: LGPL-2.1 /* - * fs/cifs/xattr.c * * Copyright (c) International Business Machines Corp., 2003, 2007 * Author(s): Steve French (sfrench@us.ibm.com) diff --git a/fs/file.c b/fs/file.c index d8afa8266859..8627dacfc424 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1150,6 +1150,12 @@ int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags) return new_fd; } +int receive_fd(struct file *file, unsigned int o_flags) +{ + return __receive_fd(file, NULL, o_flags); +} +EXPORT_SYMBOL_GPL(receive_fd); + static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags) { int err = -EBADF; diff --git a/fs/fs_parser.c b/fs/fs_parser.c index 980d44fd3a36..3df07c0e32b3 100644 --- a/fs/fs_parser.c +++ b/fs/fs_parser.c @@ -165,7 +165,6 @@ int fs_lookup_param(struct fs_context *fc, return invalf(fc, "%s: not usable as path", param->key); } - f->refcnt++; /* filename_lookup() drops our ref. */ ret = filename_lookup(param->dirfd, f, flags, _path, NULL); if (ret < 0) { errorf(fc, "%s: Lookup failure for '%s'", param->key, f->name); diff --git a/fs/internal.h b/fs/internal.h index 68a2ae029a27..3cd065c8a66b 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -18,7 +18,7 @@ struct user_namespace; struct pipe_inode_info; /* - * block_dev.c + * block/bdev.c */ #ifdef CONFIG_BLOCK extern void __init bdev_cache_init(void); diff --git a/fs/io-wq.c b/fs/io-wq.c index d80e4a735677..c2e0e8e80949 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -14,6 +14,7 @@ #include <linux/rculist_nulls.h> #include <linux/cpu.h> #include <linux/tracehook.h> +#include <uapi/linux/io_uring.h> #include "io-wq.h" @@ -176,7 +177,6 @@ static void io_worker_ref_put(struct io_wq *wq) static void io_worker_exit(struct io_worker *worker) { struct io_wqe *wqe = worker->wqe; - struct io_wqe_acct *acct = io_wqe_get_acct(worker); if (refcount_dec_and_test(&worker->ref)) complete(&worker->ref_done); @@ -186,7 +186,6 @@ static void io_worker_exit(struct io_worker *worker) if (worker->flags & IO_WORKER_F_FREE) hlist_nulls_del_rcu(&worker->nulls_node); list_del_rcu(&worker->all_list); - acct->nr_workers--; preempt_disable(); io_wqe_dec_running(worker); worker->flags = 0; @@ -246,8 +245,6 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe, */ static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) { - bool do_create = false; - /* * Most likely an attempt to queue unbounded work on an io_wq that * wasn't setup with any unbounded workers. @@ -256,18 +253,15 @@ static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) pr_warn_once("io-wq is not configured for unbound workers"); raw_spin_lock(&wqe->lock); - if (acct->nr_workers < acct->max_workers) { - acct->nr_workers++; - do_create = true; + if (acct->nr_workers == acct->max_workers) { + raw_spin_unlock(&wqe->lock); + return true; } + acct->nr_workers++; raw_spin_unlock(&wqe->lock); - if (do_create) { - atomic_inc(&acct->nr_running); - atomic_inc(&wqe->wq->worker_refs); - return create_io_worker(wqe->wq, wqe, acct->index); - } - - return true; + atomic_inc(&acct->nr_running); + atomic_inc(&wqe->wq->worker_refs); + return create_io_worker(wqe->wq, wqe, acct->index); } static void io_wqe_inc_running(struct io_worker *worker) @@ -574,6 +568,7 @@ loop: } /* timed out, exit unless we're the last worker */ if (last_timeout && acct->nr_workers > 1) { + acct->nr_workers--; raw_spin_unlock(&wqe->lock); __set_current_state(TASK_RUNNING); break; @@ -709,6 +704,7 @@ static void create_worker_cont(struct callback_head *cb) } raw_spin_unlock(&wqe->lock); io_worker_ref_put(wqe->wq); + kfree(worker); return; } @@ -725,6 +721,7 @@ static void io_workqueue_create(struct work_struct *work) if (!io_queue_worker_create(worker, acct, create_worker_cont)) { clear_bit_unlock(0, &worker->create_state); io_worker_release(worker); + kfree(worker); } } @@ -759,6 +756,7 @@ fail: if (!IS_ERR(tsk)) { io_init_new_worker(wqe, worker, tsk); } else if (!io_should_retry_thread(PTR_ERR(tsk))) { + kfree(worker); goto fail; } else { INIT_WORK(&worker->work, io_workqueue_create); @@ -832,6 +830,11 @@ append: wq_list_add_after(&work->list, &tail->list, &acct->work_list); } +static bool io_wq_work_match_item(struct io_wq_work *work, void *data) +{ + return work == data; +} + static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) { struct io_wqe_acct *acct = io_work_get_acct(wqe, work); @@ -844,7 +847,6 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) */ if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) || (work->flags & IO_WQ_WORK_CANCEL)) { -run_cancel: io_run_cancel(work, wqe); return; } @@ -864,15 +866,22 @@ run_cancel: bool did_create; did_create = io_wqe_create_worker(wqe, acct); - if (unlikely(!did_create)) { - raw_spin_lock(&wqe->lock); - /* fatal condition, failed to create the first worker */ - if (!acct->nr_workers) { - raw_spin_unlock(&wqe->lock); - goto run_cancel; - } - raw_spin_unlock(&wqe->lock); + if (likely(did_create)) + return; + + raw_spin_lock(&wqe->lock); + /* fatal condition, failed to create the first worker */ + if (!acct->nr_workers) { + struct io_cb_cancel_data match = { + .fn = io_wq_work_match_item, + .data = work, + .cancel_all = false, + }; + + if (io_acct_cancel_pending_work(wqe, acct, &match)) + raw_spin_lock(&wqe->lock); } + raw_spin_unlock(&wqe->lock); } } @@ -1122,7 +1131,7 @@ static bool io_task_work_match(struct callback_head *cb, void *data) { struct io_worker *worker; - if (cb->func != create_worker_cb || cb->func != create_worker_cont) + if (cb->func != create_worker_cb && cb->func != create_worker_cont) return false; worker = container_of(cb, struct io_worker, create_work); return worker->wqe->wq == data; @@ -1143,9 +1152,14 @@ static void io_wq_exit_workers(struct io_wq *wq) while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) { struct io_worker *worker; + struct io_wqe_acct *acct; worker = container_of(cb, struct io_worker, create_work); - atomic_dec(&worker->wqe->acct[worker->create_index].nr_running); + acct = io_wqe_get_acct(worker); + atomic_dec(&acct->nr_running); + raw_spin_lock(&worker->wqe->lock); + acct->nr_workers--; + raw_spin_unlock(&worker->wqe->lock); io_worker_ref_put(wq); clear_bit_unlock(0, &worker->create_state); io_worker_release(worker); @@ -1268,6 +1282,10 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count) { int i, node, prev = 0; + BUILD_BUG_ON((int) IO_WQ_ACCT_BOUND != (int) IO_WQ_BOUND); + BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND); + BUILD_BUG_ON((int) IO_WQ_ACCT_NR != 2); + for (i = 0; i < 2; i++) { if (new_count[i] > task_rlimit(current, RLIMIT_NPROC)) new_count[i] = task_rlimit(current, RLIMIT_NPROC); diff --git a/fs/io_uring.c b/fs/io_uring.c index 855ea544807f..e372d5b9f6dc 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -712,6 +712,7 @@ struct io_async_rw { struct iovec fast_iov[UIO_FASTIOV]; const struct iovec *free_iovec; struct iov_iter iter; + struct iov_iter_state iter_state; size_t bytes_done; struct wait_page_queue wpq; }; @@ -735,7 +736,6 @@ enum { REQ_F_BUFFER_SELECTED_BIT, REQ_F_COMPLETE_INLINE_BIT, REQ_F_REISSUE_BIT, - REQ_F_DONT_REISSUE_BIT, REQ_F_CREDS_BIT, REQ_F_REFCOUNT_BIT, REQ_F_ARM_LTIMEOUT_BIT, @@ -782,8 +782,6 @@ enum { REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT), /* caller should reissue async */ REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT), - /* don't attempt request reissue, see io_rw_reissue() */ - REQ_F_DONT_REISSUE = BIT(REQ_F_DONT_REISSUE_BIT), /* supports async reads */ REQ_F_NOWAIT_READ = BIT(REQ_F_NOWAIT_READ_BIT), /* supports async writes */ @@ -1482,6 +1480,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status) struct io_timeout_data *io = req->async_data; if (hrtimer_try_to_cancel(&io->timer) != -1) { + if (status) + req_set_fail(req); atomic_set(&req->ctx->cq_timeouts, atomic_read(&req->ctx->cq_timeouts) + 1); list_del_init(&req->timeout.list); @@ -1619,8 +1619,11 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) { + /* see waitqueue_active() comment */ + smp_mb(); + if (ctx->flags & IORING_SETUP_SQPOLL) { - if (wq_has_sleeper(&ctx->cq_wait)) + if (waitqueue_active(&ctx->cq_wait)) wake_up_all(&ctx->cq_wait); } if (io_should_trigger_evfd(ctx)) @@ -2439,13 +2442,6 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, req = list_first_entry(done, struct io_kiocb, inflight_entry); list_del(&req->inflight_entry); - if (READ_ONCE(req->result) == -EAGAIN && - !(req->flags & REQ_F_DONT_REISSUE)) { - req->iopoll_completed = 0; - io_req_task_queue_reissue(req); - continue; - } - __io_cqring_fill_event(ctx, req->user_data, req->result, io_put_rw_kbuf(req)); (*nr_events)++; @@ -2608,8 +2604,7 @@ static bool io_resubmit_prep(struct io_kiocb *req) if (!rw) return !io_req_prep_async(req); - /* may have left rw->iter inconsistent on -EIOCBQUEUED */ - iov_iter_revert(&rw->iter, req->result - iov_iter_count(&rw->iter)); + iov_iter_restore(&rw->iter, &rw->iter_state); return true; } @@ -2709,10 +2704,9 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) if (kiocb->ki_flags & IOCB_WRITE) kiocb_end_write(req); if (unlikely(res != req->result)) { - if (!(res == -EAGAIN && io_rw_should_reissue(req) && - io_resubmit_prep(req))) { - req_set_fail(req); - req->flags |= REQ_F_DONT_REISSUE; + if (res == -EAGAIN && io_rw_should_reissue(req)) { + req->flags |= REQ_F_REISSUE; + return; } } @@ -2838,7 +2832,8 @@ static bool io_file_supports_nowait(struct io_kiocb *req, int rw) return __io_file_supports_nowait(req->file, rw); } -static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) +static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, + int rw) { struct io_ring_ctx *ctx = req->ctx; struct kiocb *kiocb = &req->rw.kiocb; @@ -2860,8 +2855,13 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(ret)) return ret; - /* don't allow async punt for O_NONBLOCK or RWF_NOWAIT */ - if ((kiocb->ki_flags & IOCB_NOWAIT) || (file->f_flags & O_NONBLOCK)) + /* + * If the file is marked O_NONBLOCK, still allow retry for it if it + * supports async. Otherwise it's impossible to use O_NONBLOCK files + * reliably. If not, or it IOCB_NOWAIT is set, don't retry. + */ + if ((kiocb->ki_flags & IOCB_NOWAIT) || + ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req, rw))) req->flags |= REQ_F_NOWAIT; ioprio = READ_ONCE(sqe->ioprio); @@ -2926,7 +2926,6 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); struct io_async_rw *io = req->async_data; - bool check_reissue = kiocb->ki_complete == io_complete_rw; /* add previously done IO, if any */ if (io && io->bytes_done > 0) { @@ -2938,19 +2937,27 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, if (req->flags & REQ_F_CUR_POS) req->file->f_pos = kiocb->ki_pos; - if (ret >= 0 && check_reissue) + if (ret >= 0 && (kiocb->ki_complete == io_complete_rw)) __io_complete_rw(req, ret, 0, issue_flags); else io_rw_done(kiocb, ret); - if (check_reissue && (req->flags & REQ_F_REISSUE)) { + if (req->flags & REQ_F_REISSUE) { req->flags &= ~REQ_F_REISSUE; if (io_resubmit_prep(req)) { io_req_task_queue_reissue(req); } else { + unsigned int cflags = io_put_rw_kbuf(req); + struct io_ring_ctx *ctx = req->ctx; + req_set_fail(req); - __io_req_complete(req, issue_flags, ret, - io_put_rw_kbuf(req)); + if (issue_flags & IO_URING_F_NONBLOCK) { + mutex_lock(&ctx->uring_lock); + __io_req_complete(req, issue_flags, ret, cflags); + mutex_unlock(&ctx->uring_lock); + } else { + __io_req_complete(req, issue_flags, ret, cflags); + } } } } @@ -3258,12 +3265,15 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) ret = nr; break; } + if (!iov_iter_is_bvec(iter)) { + iov_iter_advance(iter, nr); + } else { + req->rw.len -= nr; + req->rw.addr += nr; + } ret += nr; if (nr != iovec.iov_len) break; - req->rw.len -= nr; - req->rw.addr += nr; - iov_iter_advance(iter, nr); } return ret; @@ -3310,12 +3320,17 @@ static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, if (!force && !io_op_defs[req->opcode].needs_async_setup) return 0; if (!req->async_data) { + struct io_async_rw *iorw; + if (io_alloc_async_data(req)) { kfree(iovec); return -ENOMEM; } io_req_map_rw(req, iovec, fast_iov, iter); + iorw = req->async_data; + /* we've copied and mapped the iter, ensure state is saved */ + iov_iter_save_state(&iorw->iter, &iorw->iter_state); } return 0; } @@ -3334,6 +3349,7 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw) iorw->free_iovec = iov; if (iov) req->flags |= REQ_F_NEED_CLEANUP; + iov_iter_save_state(&iorw->iter, &iorw->iter_state); return 0; } @@ -3341,7 +3357,7 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { if (unlikely(!(req->file->f_mode & FMODE_READ))) return -EBADF; - return io_prep_rw(req, sqe); + return io_prep_rw(req, sqe, READ); } /* @@ -3437,19 +3453,28 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) struct kiocb *kiocb = &req->rw.kiocb; struct iov_iter __iter, *iter = &__iter; struct io_async_rw *rw = req->async_data; - ssize_t io_size, ret, ret2; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + struct iov_iter_state __state, *state; + ssize_t ret, ret2; if (rw) { iter = &rw->iter; + state = &rw->iter_state; + /* + * We come here from an earlier attempt, restore our state to + * match in case it doesn't. It's cheap enough that we don't + * need to make this conditional. + */ + iov_iter_restore(iter, state); iovec = NULL; } else { ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock); if (ret < 0) return ret; + state = &__state; + iov_iter_save_state(iter, state); } - io_size = iov_iter_count(iter); - req->result = io_size; + req->result = iov_iter_count(iter); /* Ensure we clear previously set non-block flag */ if (!force_nonblock) @@ -3463,7 +3488,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) return ret ?: -EAGAIN; } - ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), io_size); + ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result); if (unlikely(ret)) { kfree(iovec); return ret; @@ -3479,30 +3504,49 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) /* no retry on NONBLOCK nor RWF_NOWAIT */ if (req->flags & REQ_F_NOWAIT) goto done; - /* some cases will consume bytes even on error returns */ - iov_iter_reexpand(iter, iter->count + iter->truncated); - iov_iter_revert(iter, io_size - iov_iter_count(iter)); ret = 0; } else if (ret == -EIOCBQUEUED) { goto out_free; - } else if (ret <= 0 || ret == io_size || !force_nonblock || + } else if (ret <= 0 || ret == req->result || !force_nonblock || (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) { /* read all, failed, already did sync or don't want to retry */ goto done; } + /* + * Don't depend on the iter state matching what was consumed, or being + * untouched in case of error. Restore it and we'll advance it + * manually if we need to. + */ + iov_iter_restore(iter, state); + ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true); if (ret2) return ret2; iovec = NULL; rw = req->async_data; - /* now use our persistent iterator, if we aren't already */ - iter = &rw->iter; + /* + * Now use our persistent iterator and state, if we aren't already. + * We've restored and mapped the iter to match. + */ + if (iter != &rw->iter) { + iter = &rw->iter; + state = &rw->iter_state; + } do { - io_size -= ret; + /* + * We end up here because of a partial read, either from + * above or inside this loop. Advance the iter by the bytes + * that were consumed. + */ + iov_iter_advance(iter, ret); + if (!iov_iter_count(iter)) + break; rw->bytes_done += ret; + iov_iter_save_state(iter, state); + /* if we can retry, do so with the callbacks armed */ if (!io_rw_should_retry(req)) { kiocb->ki_flags &= ~IOCB_WAITQ; @@ -3520,7 +3564,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) return 0; /* we got some bytes, but not all. retry. */ kiocb->ki_flags &= ~IOCB_WAITQ; - } while (ret > 0 && ret < io_size); + iov_iter_restore(iter, state); + } while (ret > 0); done: kiocb_done(kiocb, ret, issue_flags); out_free: @@ -3534,7 +3579,7 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { if (unlikely(!(req->file->f_mode & FMODE_WRITE))) return -EBADF; - return io_prep_rw(req, sqe); + return io_prep_rw(req, sqe, WRITE); } static int io_write(struct io_kiocb *req, unsigned int issue_flags) @@ -3543,19 +3588,24 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) struct kiocb *kiocb = &req->rw.kiocb; struct iov_iter __iter, *iter = &__iter; struct io_async_rw *rw = req->async_data; - ssize_t ret, ret2, io_size; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + struct iov_iter_state __state, *state; + ssize_t ret, ret2; if (rw) { iter = &rw->iter; + state = &rw->iter_state; + iov_iter_restore(iter, state); iovec = NULL; } else { ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock); if (ret < 0) return ret; + state = &__state; + iov_iter_save_state(iter, state); } - io_size = iov_iter_count(iter); - req->result = io_size; + req->result = iov_iter_count(iter); + ret2 = 0; /* Ensure we clear previously set non-block flag */ if (!force_nonblock) @@ -3572,7 +3622,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) (req->flags & REQ_F_ISREG)) goto copy_iov; - ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), io_size); + ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result); if (unlikely(ret)) goto out_free; @@ -3619,9 +3669,9 @@ done: kiocb_done(kiocb, ret2, issue_flags); } else { copy_iov: - /* some cases will consume bytes even on error returns */ - iov_iter_reexpand(iter, iter->count + iter->truncated); - iov_iter_revert(iter, io_size - iov_iter_count(iter)); + iov_iter_restore(iter, state); + if (ret2 > 0) + iov_iter_advance(iter, ret2); ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); return ret ?: -EAGAIN; } @@ -7510,6 +7560,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, break; } while (1); + if (uts) { + struct timespec64 ts; + + if (get_timespec64(&ts, uts)) + return -EFAULT; + timeout = timespec64_to_jiffies(&ts); + } + if (sig) { #ifdef CONFIG_COMPAT if (in_compat_syscall()) @@ -7523,14 +7581,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return ret; } - if (uts) { - struct timespec64 ts; - - if (get_timespec64(&ts, uts)) - return -EFAULT; - timeout = timespec64_to_jiffies(&ts); - } - init_waitqueue_func_entry(&iowq.wq, io_wake_function); iowq.wq.private = current; INIT_LIST_HEAD(&iowq.wq.entry); @@ -8279,11 +8329,27 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file, #endif } +static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, + struct io_rsrc_node *node, void *rsrc) +{ + struct io_rsrc_put *prsrc; + + prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL); + if (!prsrc) + return -ENOMEM; + + prsrc->tag = *io_get_tag_slot(data, idx); + prsrc->rsrc = rsrc; + list_add(&prsrc->list, &node->rsrc_list); + return 0; +} + static int io_install_fixed_file(struct io_kiocb *req, struct file *file, unsigned int issue_flags, u32 slot_index) { struct io_ring_ctx *ctx = req->ctx; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + bool needs_switch = false; struct io_fixed_file *file_slot; int ret = -EBADF; @@ -8299,9 +8365,22 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file, slot_index = array_index_nospec(slot_index, ctx->nr_user_files); file_slot = io_fixed_file_slot(&ctx->file_table, slot_index); - ret = -EBADF; - if (file_slot->file_ptr) - goto err; + + if (file_slot->file_ptr) { + struct file *old_file; + + ret = io_rsrc_node_switch_start(ctx); + if (ret) + goto err; + + old_file = (struct file *)(file_slot->file_ptr & FFS_MASK); + ret = io_queue_rsrc_removal(ctx->file_data, slot_index, + ctx->rsrc_node, old_file); + if (ret) + goto err; + file_slot->file_ptr = 0; + needs_switch = true; + } *io_get_tag_slot(ctx->file_data, slot_index) = 0; io_fixed_file_set(file_slot, file); @@ -8313,27 +8392,14 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file, ret = 0; err: + if (needs_switch) + io_rsrc_node_switch(ctx, ctx->file_data); io_ring_submit_unlock(ctx, !force_nonblock); if (ret) fput(file); return ret; } -static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, - struct io_rsrc_node *node, void *rsrc) -{ - struct io_rsrc_put *prsrc; - - prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL); - if (!prsrc) - return -ENOMEM; - - prsrc->tag = *io_get_tag_slot(data, idx); - prsrc->rsrc = rsrc; - list_add(&prsrc->list, &node->rsrc_list); - return 0; -} - static int __io_sqe_files_update(struct io_ring_ctx *ctx, struct io_uring_rsrc_update2 *up, unsigned nr_args) @@ -10550,8 +10616,17 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, if (ctx->flags & IORING_SETUP_SQPOLL) { sqd = ctx->sq_data; if (sqd) { + /* + * Observe the correct sqd->lock -> ctx->uring_lock + * ordering. Fine to drop uring_lock here, we hold + * a ref to the ctx. + */ + refcount_inc(&sqd->refs); + mutex_unlock(&ctx->uring_lock); mutex_lock(&sqd->lock); - tctx = sqd->thread->io_uring; + mutex_lock(&ctx->uring_lock); + if (sqd->thread) + tctx = sqd->thread->io_uring; } } else { tctx = current->io_uring; @@ -10565,16 +10640,20 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, if (ret) goto err; - if (sqd) + if (sqd) { mutex_unlock(&sqd->lock); + io_put_sq_data(sqd); + } if (copy_to_user(arg, new_count, sizeof(new_count))) return -EFAULT; return 0; err: - if (sqd) + if (sqd) { mutex_unlock(&sqd->lock); + io_put_sq_data(sqd); + } return ret; } @@ -10853,7 +10932,7 @@ static int __init io_uring_init(void) BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8)); BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST); - BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int)); + BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int)); req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT); diff --git a/fs/namei.c b/fs/namei.c index 95a881e0552b..1946d9667790 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -255,7 +255,7 @@ getname_kernel(const char * filename) void putname(struct filename *name) { - if (IS_ERR_OR_NULL(name)) + if (IS_ERR(name)) return; BUG_ON(name->refcnt <= 0); @@ -2467,7 +2467,7 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path return err; } -static int __filename_lookup(int dfd, struct filename *name, unsigned flags, +int filename_lookup(int dfd, struct filename *name, unsigned flags, struct path *path, struct path *root) { int retval; @@ -2488,15 +2488,6 @@ static int __filename_lookup(int dfd, struct filename *name, unsigned flags, return retval; } -int filename_lookup(int dfd, struct filename *name, unsigned flags, - struct path *path, struct path *root) -{ - int retval = __filename_lookup(dfd, name, flags, path, root); - - putname(name); - return retval; -} - /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ static int path_parentat(struct nameidata *nd, unsigned flags, struct path *parent) @@ -2514,9 +2505,10 @@ static int path_parentat(struct nameidata *nd, unsigned flags, return err; } -static int __filename_parentat(int dfd, struct filename *name, - unsigned int flags, struct path *parent, - struct qstr *last, int *type) +/* Note: this does not consume "name" */ +static int filename_parentat(int dfd, struct filename *name, + unsigned int flags, struct path *parent, + struct qstr *last, int *type) { int retval; struct nameidata nd; @@ -2538,25 +2530,14 @@ static int __filename_parentat(int dfd, struct filename *name, return retval; } -static int filename_parentat(int dfd, struct filename *name, - unsigned int flags, struct path *parent, - struct qstr *last, int *type) -{ - int retval = __filename_parentat(dfd, name, flags, parent, last, type); - - putname(name); - return retval; -} - /* does lookup, returns the object with parent locked */ -struct dentry *kern_path_locked(const char *name, struct path *path) +static struct dentry *__kern_path_locked(struct filename *name, struct path *path) { struct dentry *d; struct qstr last; int type, error; - error = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path, - &last, &type); + error = filename_parentat(AT_FDCWD, name, 0, path, &last, &type); if (error) return ERR_PTR(error); if (unlikely(type != LAST_NORM)) { @@ -2572,10 +2553,23 @@ struct dentry *kern_path_locked(const char *name, struct path *path) return d; } +struct dentry *kern_path_locked(const char *name, struct path *path) +{ + struct filename *filename = getname_kernel(name); + struct dentry *res = __kern_path_locked(filename, path); + + putname(filename); + return res; +} + int kern_path(const char *name, unsigned int flags, struct path *path) { - return filename_lookup(AT_FDCWD, getname_kernel(name), - flags, path, NULL); + struct filename *filename = getname_kernel(name); + int ret = filename_lookup(AT_FDCWD, filename, flags, path, NULL); + + putname(filename); + return ret; + } EXPORT_SYMBOL(kern_path); @@ -2591,10 +2585,15 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, const char *name, unsigned int flags, struct path *path) { + struct filename *filename; struct path root = {.mnt = mnt, .dentry = dentry}; + int ret; + + filename = getname_kernel(name); /* the first argument of filename_lookup() is ignored with root */ - return filename_lookup(AT_FDCWD, getname_kernel(name), - flags , path, &root); + ret = filename_lookup(AT_FDCWD, filename, flags, path, &root); + putname(filename); + return ret; } EXPORT_SYMBOL(vfs_path_lookup); @@ -2798,8 +2797,11 @@ int path_pts(struct path *path) int user_path_at_empty(int dfd, const char __user *name, unsigned flags, struct path *path, int *empty) { - return filename_lookup(dfd, getname_flags(name, flags, empty), - flags, path, NULL); + struct filename *filename = getname_flags(name, flags, empty); + int ret = filename_lookup(dfd, filename, flags, path, NULL); + + putname(filename); + return ret; } EXPORT_SYMBOL(user_path_at_empty); @@ -3618,8 +3620,8 @@ struct file *do_file_open_root(const struct path *root, return file; } -static struct dentry *__filename_create(int dfd, struct filename *name, - struct path *path, unsigned int lookup_flags) +static struct dentry *filename_create(int dfd, struct filename *name, + struct path *path, unsigned int lookup_flags) { struct dentry *dentry = ERR_PTR(-EEXIST); struct qstr last; @@ -3634,7 +3636,7 @@ static struct dentry *__filename_create(int dfd, struct filename *name, */ lookup_flags &= LOOKUP_REVAL; - error = __filename_parentat(dfd, name, lookup_flags, path, &last, &type); + error = filename_parentat(dfd, name, lookup_flags, path, &last, &type); if (error) return ERR_PTR(error); @@ -3687,21 +3689,15 @@ out: return dentry; } -static inline struct dentry *filename_create(int dfd, struct filename *name, +struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, unsigned int lookup_flags) { - struct dentry *res = __filename_create(dfd, name, path, lookup_flags); + struct filename *filename = getname_kernel(pathname); + struct dentry *res = filename_create(dfd, filename, path, lookup_flags); - putname(name); + putname(filename); return res; } - -struct dentry *kern_path_create(int dfd, const char *pathname, - struct path *path, unsigned int lookup_flags) -{ - return filename_create(dfd, getname_kernel(pathname), - path, lookup_flags); -} EXPORT_SYMBOL(kern_path_create); void done_path_create(struct path *path, struct dentry *dentry) @@ -3716,7 +3712,11 @@ EXPORT_SYMBOL(done_path_create); inline struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, unsigned int lookup_flags) { - return filename_create(dfd, getname(pathname), path, lookup_flags); + struct filename *filename = getname(pathname); + struct dentry *res = filename_create(dfd, filename, path, lookup_flags); + + putname(filename); + return res; } EXPORT_SYMBOL(user_path_create); @@ -3797,7 +3797,7 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode, if (error) goto out1; retry: - dentry = __filename_create(dfd, name, &path, lookup_flags); + dentry = filename_create(dfd, name, &path, lookup_flags); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out1; @@ -3897,7 +3897,7 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode) unsigned int lookup_flags = LOOKUP_DIRECTORY; retry: - dentry = __filename_create(dfd, name, &path, lookup_flags); + dentry = filename_create(dfd, name, &path, lookup_flags); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_putname; @@ -3996,7 +3996,7 @@ int do_rmdir(int dfd, struct filename *name) int type; unsigned int lookup_flags = 0; retry: - error = __filename_parentat(dfd, name, lookup_flags, &path, &last, &type); + error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type); if (error) goto exit1; @@ -4137,7 +4137,7 @@ int do_unlinkat(int dfd, struct filename *name) struct inode *delegated_inode = NULL; unsigned int lookup_flags = 0; retry: - error = __filename_parentat(dfd, name, lookup_flags, &path, &last, &type); + error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type); if (error) goto exit1; @@ -4266,7 +4266,7 @@ int do_symlinkat(struct filename *from, int newdfd, struct filename *to) goto out_putnames; } retry: - dentry = __filename_create(newdfd, to, &path, lookup_flags); + dentry = filename_create(newdfd, to, &path, lookup_flags); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_putnames; @@ -4426,11 +4426,11 @@ int do_linkat(int olddfd, struct filename *old, int newdfd, if (flags & AT_SYMLINK_FOLLOW) how |= LOOKUP_FOLLOW; retry: - error = __filename_lookup(olddfd, old, how, &old_path, NULL); + error = filename_lookup(olddfd, old, how, &old_path, NULL); if (error) goto out_putnames; - new_dentry = __filename_create(newdfd, new, &new_path, + new_dentry = filename_create(newdfd, new, &new_path, (how & LOOKUP_REVAL)); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) @@ -4689,13 +4689,13 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, target_flags = 0; retry: - error = __filename_parentat(olddfd, from, lookup_flags, &old_path, - &old_last, &old_type); + error = filename_parentat(olddfd, from, lookup_flags, &old_path, + &old_last, &old_type); if (error) goto put_names; - error = __filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last, - &new_type); + error = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last, + &new_type); if (error) goto exit1; diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 95006d1d29ab..fa1d99101f89 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -531,6 +531,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, /* Someone else created list structure for us */ if (inode) fsnotify_put_inode_ref(inode); + fsnotify_put_sb_connectors(conn); kmem_cache_free(fsnotify_mark_connector_cachep, conn); } diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index a6ee23aadd28..2a66844b7ff8 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c @@ -15,13 +15,27 @@ #include <linux/buffer_head.h> #include "qnx4.h" +/* + * A qnx4 directory entry is an inode entry or link info + * depending on the status field in the last byte. The + * first byte is where the name start either way, and a + * zero means it's empty. + */ +union qnx4_directory_entry { + struct { + char de_name; + char de_pad[62]; + char de_status; + }; + struct qnx4_inode_entry inode; + struct qnx4_link_info link; +}; + static int qnx4_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); unsigned int offset; struct buffer_head *bh; - struct qnx4_inode_entry *de; - struct qnx4_link_info *le; unsigned long blknum; int ix, ino; int size; @@ -38,27 +52,30 @@ static int qnx4_readdir(struct file *file, struct dir_context *ctx) } ix = (ctx->pos >> QNX4_DIR_ENTRY_SIZE_BITS) % QNX4_INODES_PER_BLOCK; for (; ix < QNX4_INODES_PER_BLOCK; ix++, ctx->pos += QNX4_DIR_ENTRY_SIZE) { + union qnx4_directory_entry *de; + const char *name; + offset = ix * QNX4_DIR_ENTRY_SIZE; - de = (struct qnx4_inode_entry *) (bh->b_data + offset); - if (!de->di_fname[0]) + de = (union qnx4_directory_entry *) (bh->b_data + offset); + + if (!de->de_name) continue; - if (!(de->di_status & (QNX4_FILE_USED|QNX4_FILE_LINK))) + if (!(de->de_status & (QNX4_FILE_USED|QNX4_FILE_LINK))) continue; - if (!(de->di_status & QNX4_FILE_LINK)) - size = QNX4_SHORT_NAME_MAX; - else - size = QNX4_NAME_MAX; - size = strnlen(de->di_fname, size); - QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, de->di_fname)); - if (!(de->di_status & QNX4_FILE_LINK)) + if (!(de->de_status & QNX4_FILE_LINK)) { + size = sizeof(de->inode.di_fname); + name = de->inode.di_fname; ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1; - else { - le = (struct qnx4_link_info*)de; - ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) * + } else { + size = sizeof(de->link.dl_fname); + name = de->link.dl_fname; + ino = ( le32_to_cpu(de->link.dl_inode_blk) - 1 ) * QNX4_INODES_PER_BLOCK + - le->dl_inode_ndx; + de->link.dl_inode_ndx; } - if (!dir_emit(ctx, de->di_fname, size, ino, DT_UNKNOWN)) { + size = strnlen(name, size); + QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, name)); + if (!dir_emit(ctx, name, size, ino, DT_UNKNOWN)) { brelse(bh); return 0; } diff --git a/fs/cifs_common/Makefile b/fs/smbfs_common/Makefile index 6fedd2f88a25..cafc61a3bfc3 100644 --- a/fs/cifs_common/Makefile +++ b/fs/smbfs_common/Makefile @@ -3,5 +3,5 @@ # Makefile for Linux filesystem routines that are shared by client and server. # -obj-$(CONFIG_CIFS_COMMON) += cifs_arc4.o -obj-$(CONFIG_CIFS_COMMON) += cifs_md4.o +obj-$(CONFIG_SMBFS_COMMON) += cifs_arc4.o +obj-$(CONFIG_SMBFS_COMMON) += cifs_md4.o diff --git a/fs/cifs_common/arc4.h b/fs/smbfs_common/arc4.h index 12e71ec033a1..12e71ec033a1 100644 --- a/fs/cifs_common/arc4.h +++ b/fs/smbfs_common/arc4.h diff --git a/fs/cifs_common/cifs_arc4.c b/fs/smbfs_common/cifs_arc4.c index b964cc682944..85ba15a60b13 100644 --- a/fs/cifs_common/cifs_arc4.c +++ b/fs/smbfs_common/cifs_arc4.c @@ -74,14 +74,14 @@ void cifs_arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int l EXPORT_SYMBOL_GPL(cifs_arc4_crypt); static int __init -init_cifs_common(void) +init_smbfs_common(void) { return 0; } static void __init -exit_cifs_common(void) +exit_smbfs_common(void) { } -module_init(init_cifs_common) -module_exit(exit_cifs_common) +module_init(init_smbfs_common) +module_exit(exit_smbfs_common) diff --git a/fs/cifs_common/cifs_md4.c b/fs/smbfs_common/cifs_md4.c index 50f78cfc6ce9..50f78cfc6ce9 100644 --- a/fs/cifs_common/cifs_md4.c +++ b/fs/smbfs_common/cifs_md4.c diff --git a/fs/cifs_common/md4.h b/fs/smbfs_common/md4.h index 5337becc699a..5337becc699a 100644 --- a/fs/cifs_common/md4.h +++ b/fs/smbfs_common/md4.h diff --git a/fs/cifs/smbfsctl.h b/fs/smbfs_common/smbfsctl.h index d0fc42061f49..926f87cd6af0 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/smbfs_common/smbfsctl.h @@ -1,6 +1,6 @@ -/* SPDX-License-Identifier: LGPL-2.1 */ +/* SPDX-License-Identifier: LGPL-2.1+ */ /* - * fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions + * SMB, CIFS, SMB2 FSCTL definitions * * Copyright (c) International Business Machines Corp., 2002,2013 * Author(s): Steve French (sfrench@us.ibm.com) @@ -19,11 +19,14 @@ * could be invoked from tools via a specialized hook into the VFS rather * than via the standard vfs entry points * - * See MS-SMB2 Section 2.2.31 (last checked June 2013, all of that list are + * See MS-SMB2 Section 2.2.31 (last checked September 2021, all of that list are * below). Additional detail on less common ones can be found in MS-FSCC * section 2.3. */ +#ifndef __SMBFSCTL_H +#define __SMBFSCTL_H + /* * FSCTL values are 32 bits and are constructed as * <device 16bits> <access 2bits> <function 12bits> <method 2bits> @@ -91,6 +94,7 @@ #define FSCTL_SET_ZERO_ON_DEALLOC 0x00090194 /* BB add struct */ #define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */ #define FSCTL_GET_INTEGRITY_INFORMATION 0x0009027C +#define FSCTL_GET_REFS_VOLUME_DATA 0x000902D8 /* See MS-FSCC 2.3.24 */ #define FSCTL_GET_RETRIEVAL_POINTERS_AND_REFCOUNT 0x000903d3 #define FSCTL_GET_RETRIEVAL_POINTER_COUNT 0x0009042b #define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF @@ -146,7 +150,13 @@ #define IO_REPARSE_TAG_LX_CHR 0x80000025 #define IO_REPARSE_TAG_LX_BLK 0x80000026 +#define IO_REPARSE_TAG_LX_SYMLINK_LE cpu_to_le32(0xA000001D) +#define IO_REPARSE_TAG_AF_UNIX_LE cpu_to_le32(0x80000023) +#define IO_REPARSE_TAG_LX_FIFO_LE cpu_to_le32(0x80000024) +#define IO_REPARSE_TAG_LX_CHR_LE cpu_to_le32(0x80000025) +#define IO_REPARSE_TAG_LX_BLK_LE cpu_to_le32(0x80000026) + /* fsctl flags */ /* If Flags is set to this value, the request is an FSCTL not ioctl request */ #define SMB2_0_IOCTL_IS_FSCTL 0x00000001 - +#endif /* __SMBFSCTL_H */ diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 9f4985b4d64d..bc159a9b4a73 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -135,6 +135,7 @@ struct cppc_cpudata { #ifdef CONFIG_ACPI_CPPC_LIB extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); +extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); @@ -149,6 +150,10 @@ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf) { return -ENOTSUPP; } +static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) +{ + return -ENOTSUPP; +} static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs) { return -ENOTSUPP; diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index e93375c710b9..cc7338f9e0d1 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -1023,16 +1023,7 @@ static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) port &= IO_SPACE_LIMIT; return (port > MMIO_UPPER_LIMIT) ? NULL : PCI_IOBASE + port; } -#define __pci_ioport_unmap __pci_ioport_unmap -static inline void __pci_ioport_unmap(void __iomem *p) -{ - uintptr_t start = (uintptr_t) PCI_IOBASE; - uintptr_t addr = (uintptr_t) p; - - if (addr >= start && addr < start + IO_SPACE_LIMIT) - return; - iounmap(p); -} +#define ARCH_HAS_GENERIC_IOPORT_MAP #endif #ifndef ioport_unmap @@ -1048,21 +1039,10 @@ extern void ioport_unmap(void __iomem *p); #endif /* CONFIG_HAS_IOPORT_MAP */ #ifndef CONFIG_GENERIC_IOMAP -struct pci_dev; -extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); - -#ifndef __pci_ioport_unmap -static inline void __pci_ioport_unmap(void __iomem *p) {} -#endif - #ifndef pci_iounmap -#define pci_iounmap pci_iounmap -static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p) -{ - __pci_ioport_unmap(p); -} +#define ARCH_WANTS_GENERIC_PCI_IOUNMAP +#endif #endif -#endif /* CONFIG_GENERIC_IOMAP */ #ifndef xlate_dev_mem_ptr #define xlate_dev_mem_ptr xlate_dev_mem_ptr diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h index 9b3eb6d86200..08237ae8b840 100644 --- a/include/asm-generic/iomap.h +++ b/include/asm-generic/iomap.h @@ -110,16 +110,6 @@ static inline void __iomem *ioremap_np(phys_addr_t offset, size_t size) } #endif -#ifdef CONFIG_PCI -/* Destroy a virtual mapping cookie for a PCI BAR (memory or IO) */ -struct pci_dev; -extern void pci_iounmap(struct pci_dev *dev, void __iomem *); -#elif defined(CONFIG_GENERIC_IOMAP) -struct pci_dev; -static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) -{ } -#endif - #include <asm-generic/pci_iomap.h> #endif diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index c1ab6a6e72b5..d3eae6cdbacb 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -197,10 +197,12 @@ static inline int hv_cpu_number_to_vp_number(int cpu_number) return hv_vp_index[cpu_number]; } -static inline int cpumask_to_vpset(struct hv_vpset *vpset, - const struct cpumask *cpus) +static inline int __cpumask_to_vpset(struct hv_vpset *vpset, + const struct cpumask *cpus, + bool exclude_self) { int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; + int this_cpu = smp_processor_id(); /* valid_bank_mask can represent up to 64 banks */ if (hv_max_vp_index / 64 >= 64) @@ -218,6 +220,8 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset, * Some banks may end up being empty but this is acceptable. */ for_each_cpu(cpu, cpus) { + if (exclude_self && cpu == this_cpu) + continue; vcpu = hv_cpu_number_to_vp_number(cpu); if (vcpu == VP_INVAL) return -1; @@ -232,6 +236,19 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset, return nr_bank; } +static inline int cpumask_to_vpset(struct hv_vpset *vpset, + const struct cpumask *cpus) +{ + return __cpumask_to_vpset(vpset, cpus, false); +} + +static inline int cpumask_to_vpset_noself(struct hv_vpset *vpset, + const struct cpumask *cpus) +{ + WARN_ON_ONCE(preemptible()); + return __cpumask_to_vpset(vpset, cpus, true); +} + void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die); bool hv_is_hyperv_initialized(void); bool hv_is_hibernation_supported(void); diff --git a/include/asm-generic/pci_iomap.h b/include/asm-generic/pci_iomap.h index df636c6d8e6c..5a2f9bf53384 100644 --- a/include/asm-generic/pci_iomap.h +++ b/include/asm-generic/pci_iomap.h @@ -18,6 +18,7 @@ extern void __iomem *pci_iomap_range(struct pci_dev *dev, int bar, extern void __iomem *pci_iomap_wc_range(struct pci_dev *dev, int bar, unsigned long offset, unsigned long maxlen); +extern void pci_iounmap(struct pci_dev *dev, void __iomem *); /* Create a virtual mapping cookie for a port on a given PCI device. * Do not call this directly, it exists to make it easier for architectures * to override */ @@ -50,6 +51,8 @@ static inline void __iomem *pci_iomap_wc_range(struct pci_dev *dev, int bar, { return NULL; } +static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) +{ } #endif #endif /* __ASM_GENERIC_PCI_IOMAP_H */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index aa50bf2959fe..f2984af2b85b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -116,11 +116,7 @@ * GCC 4.5 and later have a 32 bytes section alignment for structures. * Except GCC 4.9, that feels the need to align on 64 bytes. */ -#if __GNUC__ == 4 && __GNUC_MINOR__ == 9 -#define STRUCT_ALIGNMENT 64 -#else #define STRUCT_ALIGNMENT 32 -#endif #define STRUCT_ALIGN() . = ALIGN(STRUCT_ALIGNMENT) /* diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 818680c6a8ed..b20e89d321b0 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -27,11 +27,12 @@ #ifndef _TTM_TT_H_ #define _TTM_TT_H_ +#include <linux/pagemap.h> #include <linux/types.h> #include <drm/ttm/ttm_caching.h> #include <drm/ttm/ttm_kmap_iter.h> -struct ttm_bo_device; +struct ttm_device; struct ttm_tt; struct ttm_resource; struct ttm_buffer_object; diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index abe089c27529..537e1b991f11 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -110,7 +110,7 @@ static inline __init bool xbc_node_is_leaf(struct xbc_node *node) } /* Tree-based key-value access APIs */ -struct xbc_node * __init xbc_node_find_child(struct xbc_node *parent, +struct xbc_node * __init xbc_node_find_subkey(struct xbc_node *parent, const char *key); const char * __init xbc_node_find_value(struct xbc_node *parent, @@ -148,7 +148,7 @@ xbc_find_value(const char *key, struct xbc_node **vnode) */ static inline struct xbc_node * __init xbc_find_node(const char *key) { - return xbc_node_find_child(NULL, key); + return xbc_node_find_subkey(NULL, key); } /** diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 4f72b47973c3..2f909ed084c6 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -79,24 +79,6 @@ struct cpu_cacheinfo { bool cpu_map_populated; }; -/* - * Helpers to make sure "func" is executed on the cpu whose cache - * attributes are being detected - */ -#define DEFINE_SMP_CALL_CACHE_FUNCTION(func) \ -static inline void _##func(void *ret) \ -{ \ - int cpu = smp_processor_id(); \ - *(int *)ret = __##func(cpu); \ -} \ - \ -int func(unsigned int cpu) \ -{ \ - int ret; \ - smp_call_function_single(cpu, _##func, &ret, true); \ - return ret; \ -} - struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu); int init_cache_level(unsigned int cpu); int populate_cache_leaves(unsigned int cpu); diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index e1c705fdfa7c..db2e147e069f 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -752,107 +752,54 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains * per-socket cgroup information except for memcg association. * - * On legacy hierarchies, net_prio and net_cls controllers directly set - * attributes on each sock which can then be tested by the network layer. - * On the default hierarchy, each sock is associated with the cgroup it was - * created in and the networking layer can match the cgroup directly. - * - * To avoid carrying all three cgroup related fields separately in sock, - * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer. - * On boot, sock_cgroup_data records the cgroup that the sock was created - * in so that cgroup2 matches can be made; however, once either net_prio or - * net_cls starts being used, the area is overridden to carry prioidx and/or - * classid. The two modes are distinguished by whether the lowest bit is - * set. Clear bit indicates cgroup pointer while set bit prioidx and - * classid. - * - * While userland may start using net_prio or net_cls at any time, once - * either is used, cgroup2 matching no longer works. There is no reason to - * mix the two and this is in line with how legacy and v2 compatibility is - * handled. On mode switch, cgroup references which are already being - * pointed to by socks may be leaked. While this can be remedied by adding - * synchronization around sock_cgroup_data, given that the number of leaked - * cgroups is bound and highly unlikely to be high, this seems to be the - * better trade-off. + * On legacy hierarchies, net_prio and net_cls controllers directly + * set attributes on each sock which can then be tested by the network + * layer. On the default hierarchy, each sock is associated with the + * cgroup it was created in and the networking layer can match the + * cgroup directly. */ struct sock_cgroup_data { - union { -#ifdef __LITTLE_ENDIAN - struct { - u8 is_data : 1; - u8 no_refcnt : 1; - u8 unused : 6; - u8 padding; - u16 prioidx; - u32 classid; - } __packed; -#else - struct { - u32 classid; - u16 prioidx; - u8 padding; - u8 unused : 6; - u8 no_refcnt : 1; - u8 is_data : 1; - } __packed; + struct cgroup *cgroup; /* v2 */ +#ifdef CONFIG_CGROUP_NET_CLASSID + u32 classid; /* v1 */ +#endif +#ifdef CONFIG_CGROUP_NET_PRIO + u16 prioidx; /* v1 */ #endif - u64 val; - }; }; -/* - * There's a theoretical window where the following accessors race with - * updaters and return part of the previous pointer as the prioidx or - * classid. Such races are short-lived and the result isn't critical. - */ static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd) { - /* fallback to 1 which is always the ID of the root cgroup */ - return (skcd->is_data & 1) ? skcd->prioidx : 1; +#ifdef CONFIG_CGROUP_NET_PRIO + return READ_ONCE(skcd->prioidx); +#else + return 1; +#endif } static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd) { - /* fallback to 0 which is the unconfigured default classid */ - return (skcd->is_data & 1) ? skcd->classid : 0; +#ifdef CONFIG_CGROUP_NET_CLASSID + return READ_ONCE(skcd->classid); +#else + return 0; +#endif } -/* - * If invoked concurrently, the updaters may clobber each other. The - * caller is responsible for synchronization. - */ static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, u16 prioidx) { - struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }}; - - if (sock_cgroup_prioidx(&skcd_buf) == prioidx) - return; - - if (!(skcd_buf.is_data & 1)) { - skcd_buf.val = 0; - skcd_buf.is_data = 1; - } - - skcd_buf.prioidx = prioidx; - WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ +#ifdef CONFIG_CGROUP_NET_PRIO + WRITE_ONCE(skcd->prioidx, prioidx); +#endif } static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, u32 classid) { - struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }}; - - if (sock_cgroup_classid(&skcd_buf) == classid) - return; - - if (!(skcd_buf.is_data & 1)) { - skcd_buf.val = 0; - skcd_buf.is_data = 1; - } - - skcd_buf.classid = classid; - WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ +#ifdef CONFIG_CGROUP_NET_CLASSID + WRITE_ONCE(skcd->classid, classid); +#endif } #else /* CONFIG_SOCK_CGROUP_DATA */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 7bf60454a313..75c151413fda 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -829,33 +829,13 @@ static inline void cgroup_account_cputime_field(struct task_struct *task, */ #ifdef CONFIG_SOCK_CGROUP_DATA -#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) -extern spinlock_t cgroup_sk_update_lock; -#endif - -void cgroup_sk_alloc_disable(void); void cgroup_sk_alloc(struct sock_cgroup_data *skcd); void cgroup_sk_clone(struct sock_cgroup_data *skcd); void cgroup_sk_free(struct sock_cgroup_data *skcd); static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) { -#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) - unsigned long v; - - /* - * @skcd->val is 64bit but the following is safe on 32bit too as we - * just need the lower ulong to be written and read atomically. - */ - v = READ_ONCE(skcd->val); - - if (v & 3) - return &cgrp_dfl_root.cgrp; - - return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp; -#else - return (struct cgroup *)(unsigned long)skcd->val; -#endif + return skcd->cgroup; } #else /* CONFIG_CGROUP_DATA */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 49b0ac8b6fd3..3c4de9b6c6e3 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -62,19 +62,6 @@ #define __no_sanitize_coverage #endif -/* - * Not all versions of clang implement the type-generic versions - * of the builtin overflow checkers. Fortunately, clang implements - * __has_builtin allowing us to avoid awkward version - * checks. Unfortunately, we don't know which version of gcc clang - * pretends to be, so the macro may or may not be defined. - */ -#if __has_builtin(__builtin_mul_overflow) && \ - __has_builtin(__builtin_add_overflow) && \ - __has_builtin(__builtin_sub_overflow) -#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 -#endif - #if __has_feature(shadow_call_stack) # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index cb9217fc60af..bd2b881c6b63 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -43,9 +43,6 @@ #define __compiletime_object_size(obj) __builtin_object_size(obj, 0) -#define __compiletime_warning(message) __attribute__((__warning__(message))) -#define __compiletime_error(message) __attribute__((__error__(message))) - #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) #define __latent_entropy __attribute__((latent_entropy)) #endif @@ -98,10 +95,8 @@ #if GCC_VERSION >= 70000 #define KASAN_ABI_VERSION 5 -#elif GCC_VERSION >= 50000 +#else #define KASAN_ABI_VERSION 4 -#elif GCC_VERSION >= 40902 -#define KASAN_ABI_VERSION 3 #endif #if __has_attribute(__no_sanitize_address__) @@ -128,10 +123,6 @@ #define __no_sanitize_coverage #endif -#if GCC_VERSION >= 50100 -#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 -#endif - /* * Turn individual warnings and errors on and off locally, depending * on version. diff --git a/include/linux/compiler.h b/include/linux/compiler.h index b67261a1e3e9..3d5af56337bd 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -188,6 +188,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, (typeof(ptr)) (__ptr + (off)); }) #endif +#define absolute_pointer(val) RELOC_HIDE((void *)(val), 0) + #ifndef OPTIMIZER_HIDE_VAR /* Make the optimizer believe the variable can be manipulated arbitrarily. */ #define OPTIMIZER_HIDE_VAR(var) \ diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 2487be0e7199..e6ec63403965 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -21,26 +21,6 @@ */ /* - * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17. - * In the meantime, to support gcc < 5, we implement __has_attribute - * by hand. - */ -#ifndef __has_attribute -# define __has_attribute(x) __GCC4_has_attribute_##x -# define __GCC4_has_attribute___assume_aligned__ 1 -# define __GCC4_has_attribute___copy__ 0 -# define __GCC4_has_attribute___designated_init__ 0 -# define __GCC4_has_attribute___externally_visible__ 1 -# define __GCC4_has_attribute___no_caller_saved_registers__ 0 -# define __GCC4_has_attribute___noclone__ 1 -# define __GCC4_has_attribute___no_profile_instrument_function__ 0 -# define __GCC4_has_attribute___nonstring__ 0 -# define __GCC4_has_attribute___no_sanitize_address__ 1 -# define __GCC4_has_attribute___no_sanitize_undefined__ 1 -# define __GCC4_has_attribute___fallthrough__ 0 -#endif - -/* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alias-function-attribute */ #define __alias(symbol) __attribute__((__alias__(#symbol))) @@ -74,7 +54,6 @@ * compiler should see some alignment anyway, when the return value is * massaged by 'flags = ptr & 3; ptr &= ~3;'). * - * Optional: only supported since gcc >= 4.9 * Optional: not supported by icc * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-assume_005faligned-function-attribute @@ -138,6 +117,17 @@ #endif /* + * Optional: only supported since clang >= 14.0 + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-error-function-attribute + */ +#if __has_attribute(__error__) +# define __compiletime_error(msg) __attribute__((__error__(msg))) +#else +# define __compiletime_error(msg) +#endif + +/* * Optional: not supported by clang * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-externally_005fvisible-function-attribute @@ -299,6 +289,17 @@ #define __must_check __attribute__((__warn_unused_result__)) /* + * Optional: only supported since clang >= 14.0 + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-warning-function-attribute + */ +#if __has_attribute(__warning__) +# define __compiletime_warning(msg) __attribute__((__warning__(msg))) +#else +# define __compiletime_warning(msg) +#endif + +/* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute */ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index e4ea86fc584d..b6ff83a714ca 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -294,12 +294,6 @@ struct ftrace_likely_data { #ifndef __compiletime_object_size # define __compiletime_object_size(obj) -1 #endif -#ifndef __compiletime_warning -# define __compiletime_warning(message) -#endif -#ifndef __compiletime_error -# define __compiletime_error(message) -#endif #ifdef __OPTIMIZE__ # define __compiletime_assert(condition, msg, prefix, suffix) \ diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 94a578a96202..9cf51e41e697 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -143,12 +143,6 @@ static inline int remove_cpu(unsigned int cpu) { return -EPERM; } static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } #endif /* !CONFIG_HOTPLUG_CPU */ -/* Wrappers which go away once all code is converted */ -static inline void cpu_hotplug_begin(void) { cpus_write_lock(); } -static inline void cpu_hotplug_done(void) { cpus_write_unlock(); } -static inline void get_online_cpus(void) { cpus_read_lock(); } -static inline void put_online_cpus(void) { cpus_read_unlock(); } - #ifdef CONFIG_PM_SLEEP_SMP extern int freeze_secondary_cpus(int primary); extern void thaw_secondary_cpus(void); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 39cf84a30b9f..832d8a74fa59 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -22,8 +22,42 @@ * AP_ACTIVE AP_ACTIVE */ +/* + * CPU hotplug states. The state machine invokes the installed state + * startup callbacks sequentially from CPUHP_OFFLINE + 1 to CPUHP_ONLINE + * during a CPU online operation. During a CPU offline operation the + * installed teardown callbacks are invoked in the reverse order from + * CPU_ONLINE - 1 down to CPUHP_OFFLINE. + * + * The state space has three sections: PREPARE, STARTING and ONLINE. + * + * PREPARE: The callbacks are invoked on a control CPU before the + * hotplugged CPU is started up or after the hotplugged CPU has died. + * + * STARTING: The callbacks are invoked on the hotplugged CPU from the low level + * hotplug startup/teardown code with interrupts disabled. + * + * ONLINE: The callbacks are invoked on the hotplugged CPU from the per CPU + * hotplug thread with interrupts and preemption enabled. + * + * Adding explicit states to this enum is only necessary when: + * + * 1) The state is within the STARTING section + * + * 2) The state has ordering constraints vs. other states in the + * same section. + * + * If neither #1 nor #2 apply, please use the dynamic state space when + * setting up a state by using CPUHP_PREPARE_DYN or CPUHP_PREPARE_ONLINE + * for the @state argument of the setup function. + * + * See Documentation/core-api/cpu_hotplug.rst for further information and + * examples. + */ enum cpuhp_state { CPUHP_INVALID = -1, + + /* PREPARE section invoked on a control CPU */ CPUHP_OFFLINE = 0, CPUHP_CREATE_THREADS, CPUHP_PERF_PREPARE, @@ -95,6 +129,11 @@ enum cpuhp_state { CPUHP_BP_PREPARE_DYN, CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, CPUHP_BRINGUP_CPU, + + /* + * STARTING section invoked on the hotplugged CPU in low level + * bringup and teardown code. + */ CPUHP_AP_IDLE_DEAD, CPUHP_AP_OFFLINE, CPUHP_AP_SCHED_STARTING, @@ -155,6 +194,8 @@ enum cpuhp_state { CPUHP_AP_ARM_CACHE_B15_RAC_DYING, CPUHP_AP_ONLINE, CPUHP_TEARDOWN_CPU, + + /* Online section invoked on the hotplugged CPU from the hotplug thread */ CPUHP_AP_ONLINE_IDLE, CPUHP_AP_SCHED_WAIT_EMPTY, CPUHP_AP_SMPBOOT_THREADS, @@ -216,14 +257,15 @@ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, const char *name, int (*teardown)(unsigned int cpu), bool multi_instance); /** - * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks + * cpuhp_setup_state - Setup hotplug state callbacks with calling the @startup + * callback * @state: The state for which the calls are installed * @name: Name of the callback (will be used in debug output) - * @startup: startup callback function - * @teardown: teardown callback function + * @startup: startup callback function or NULL if not required + * @teardown: teardown callback function or NULL if not required * - * Installs the callback functions and invokes the startup callback on - * the present cpus which have already reached the @state. + * Installs the callback functions and invokes the @startup callback on + * the online cpus which have already reached the @state. */ static inline int cpuhp_setup_state(enum cpuhp_state state, const char *name, @@ -233,6 +275,18 @@ static inline int cpuhp_setup_state(enum cpuhp_state state, return __cpuhp_setup_state(state, name, true, startup, teardown, false); } +/** + * cpuhp_setup_state_cpuslocked - Setup hotplug state callbacks with calling + * @startup callback from a cpus_read_lock() + * held region + * @state: The state for which the calls are installed + * @name: Name of the callback (will be used in debug output) + * @startup: startup callback function or NULL if not required + * @teardown: teardown callback function or NULL if not required + * + * Same as cpuhp_setup_state() except that it must be invoked from within a + * cpus_read_lock() held region. + */ static inline int cpuhp_setup_state_cpuslocked(enum cpuhp_state state, const char *name, int (*startup)(unsigned int cpu), @@ -244,14 +298,14 @@ static inline int cpuhp_setup_state_cpuslocked(enum cpuhp_state state, /** * cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling the - * callbacks + * @startup callback * @state: The state for which the calls are installed * @name: Name of the callback. - * @startup: startup callback function - * @teardown: teardown callback function + * @startup: startup callback function or NULL if not required + * @teardown: teardown callback function or NULL if not required * - * Same as @cpuhp_setup_state except that no calls are executed are invoked - * during installation of this callback. NOP if SMP=n or HOTPLUG_CPU=n. + * Same as cpuhp_setup_state() except that the @startup callback is not + * invoked during installation. NOP if SMP=n or HOTPLUG_CPU=n. */ static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state, const char *name, @@ -262,6 +316,19 @@ static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state, false); } +/** + * cpuhp_setup_state_nocalls_cpuslocked - Setup hotplug state callbacks without + * invoking the @startup callback from + * a cpus_read_lock() held region + * callbacks + * @state: The state for which the calls are installed + * @name: Name of the callback. + * @startup: startup callback function or NULL if not required + * @teardown: teardown callback function or NULL if not required + * + * Same as cpuhp_setup_state_nocalls() except that it must be invoked from + * within a cpus_read_lock() held region. + */ static inline int cpuhp_setup_state_nocalls_cpuslocked(enum cpuhp_state state, const char *name, int (*startup)(unsigned int cpu), @@ -275,13 +342,13 @@ static inline int cpuhp_setup_state_nocalls_cpuslocked(enum cpuhp_state state, * cpuhp_setup_state_multi - Add callbacks for multi state * @state: The state for which the calls are installed * @name: Name of the callback. - * @startup: startup callback function - * @teardown: teardown callback function + * @startup: startup callback function or NULL if not required + * @teardown: teardown callback function or NULL if not required * * Sets the internal multi_instance flag and prepares a state to work as a multi * instance callback. No callbacks are invoked at this point. The callbacks are * invoked once an instance for this state are registered via - * @cpuhp_state_add_instance or @cpuhp_state_add_instance_nocalls. + * cpuhp_state_add_instance() or cpuhp_state_add_instance_nocalls() */ static inline int cpuhp_setup_state_multi(enum cpuhp_state state, const char *name, @@ -306,9 +373,10 @@ int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state, * @state: The state for which the instance is installed * @node: The node for this individual state. * - * Installs the instance for the @state and invokes the startup callback on - * the present cpus which have already reached the @state. The @state must have - * been earlier marked as multi-instance by @cpuhp_setup_state_multi. + * Installs the instance for the @state and invokes the registered startup + * callback on the online cpus which have already reached the @state. The + * @state must have been earlier marked as multi-instance by + * cpuhp_setup_state_multi(). */ static inline int cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node) @@ -322,8 +390,9 @@ static inline int cpuhp_state_add_instance(enum cpuhp_state state, * @state: The state for which the instance is installed * @node: The node for this individual state. * - * Installs the instance for the @state The @state must have been earlier - * marked as multi-instance by @cpuhp_setup_state_multi. + * Installs the instance for the @state. The @state must have been earlier + * marked as multi-instance by cpuhp_setup_state_multi. NOP if SMP=n or + * HOTPLUG_CPU=n. */ static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state, struct hlist_node *node) @@ -331,6 +400,17 @@ static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state, return __cpuhp_state_add_instance(state, node, false); } +/** + * cpuhp_state_add_instance_nocalls_cpuslocked - Add an instance for a state + * without invoking the startup + * callback from a cpus_read_lock() + * held region. + * @state: The state for which the instance is installed + * @node: The node for this individual state. + * + * Same as cpuhp_state_add_instance_nocalls() except that it must be + * invoked from within a cpus_read_lock() held region. + */ static inline int cpuhp_state_add_instance_nocalls_cpuslocked(enum cpuhp_state state, struct hlist_node *node) @@ -346,7 +426,7 @@ void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke); * @state: The state for which the calls are removed * * Removes the callback functions and invokes the teardown callback on - * the present cpus which have already reached the @state. + * the online cpus which have already reached the @state. */ static inline void cpuhp_remove_state(enum cpuhp_state state) { @@ -355,7 +435,7 @@ static inline void cpuhp_remove_state(enum cpuhp_state state) /** * cpuhp_remove_state_nocalls - Remove hotplug state callbacks without invoking - * teardown + * the teardown callback * @state: The state for which the calls are removed */ static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state) @@ -363,6 +443,14 @@ static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state) __cpuhp_remove_state(state, false); } +/** + * cpuhp_remove_state_nocalls_cpuslocked - Remove hotplug state callbacks without invoking + * teardown from a cpus_read_lock() held region. + * @state: The state for which the calls are removed + * + * Same as cpuhp_remove_state nocalls() except that it must be invoked + * from within a cpus_read_lock() held region. + */ static inline void cpuhp_remove_state_nocalls_cpuslocked(enum cpuhp_state state) { __cpuhp_remove_state_cpuslocked(state, false); @@ -390,8 +478,8 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state, * @state: The state from which the instance is removed * @node: The node for this individual state. * - * Removes the instance and invokes the teardown callback on the present cpus - * which have already reached the @state. + * Removes the instance and invokes the teardown callback on the online cpus + * which have already reached @state. */ static inline int cpuhp_state_remove_instance(enum cpuhp_state state, struct hlist_node *node) diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 1834752c5617..39dcadd492b5 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -11,7 +11,7 @@ #include <linux/types.h> /** - * em_perf_state - Performance state of a performance domain + * struct em_perf_state - Performance state of a performance domain * @frequency: The frequency in KHz, for consistency with CPUFreq * @power: The power consumed at this level (by 1 CPU or by a registered * device). It can be a total power: static and dynamic. @@ -25,7 +25,7 @@ struct em_perf_state { }; /** - * em_perf_domain - Performance domain + * struct em_perf_domain - Performance domain * @table: List of performance states, in ascending order * @nr_perf_states: Number of performance states * @milliwatts: Flag indicating the power values are in milli-Watts @@ -103,12 +103,12 @@ void em_dev_unregister_perf_domain(struct device *dev); /** * em_cpu_energy() - Estimates the energy consumed by the CPUs of a - performance domain + * performance domain * @pd : performance domain for which energy has to be estimated * @max_util : highest utilization among CPUs of the domain * @sum_util : sum of the utilization of all CPUs in the domain * @allowed_cpu_cap : maximum allowed CPU capacity for the @pd, which - might reflect reduced frequency (due to thermal) + * might reflect reduced frequency (due to thermal) * * This function must be used only for CPU devices. There is no validation, * i.e. if the EM is a CPU type and has cpumask allocated. It is called from diff --git a/include/linux/file.h b/include/linux/file.h index 2de2e4613d7b..51e830b4fe3a 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -94,6 +94,9 @@ extern void fd_install(unsigned int fd, struct file *file); extern int __receive_fd(struct file *file, int __user *ufd, unsigned int o_flags); + +extern int receive_fd(struct file *file, unsigned int o_flags); + static inline int receive_fd_user(struct file *file, int __user *ufd, unsigned int o_flags) { @@ -101,10 +104,6 @@ static inline int receive_fd_user(struct file *file, int __user *ufd, return -EFAULT; return __receive_fd(file, ufd, o_flags); } -static inline int receive_fd(struct file *file, unsigned int o_flags) -{ - return __receive_fd(file, NULL, o_flags); -} int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags); extern void flush_delayed_fput(void); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b066024c62e3..34de69b3b8ba 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -118,6 +118,7 @@ int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); void memblock_free_all(void); +void memblock_free_ptr(void *ptr, size_t size); void reset_node_managed_pages(pg_data_t *pgdat); void reset_all_zones_managed_pages(void); diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index b179f1e3541a..96e113e23d04 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -144,15 +144,6 @@ static inline void mmap_read_unlock(struct mm_struct *mm) up_read(&mm->mmap_lock); } -static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm) -{ - if (mmap_read_trylock(mm)) { - rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_); - return true; - } - return false; -} - static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) { __mmap_lock_trace_released(mm, false); diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 0f12345c21fb..4669632bd72b 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -6,12 +6,9 @@ #include <linux/limits.h> /* - * In the fallback code below, we need to compute the minimum and - * maximum values representable in a given type. These macros may also - * be useful elsewhere, so we provide them outside the - * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block. - * - * It would seem more obvious to do something like + * We need to compute the minimum and maximum values representable in a given + * type. These macros may also be useful elsewhere. It would seem more obvious + * to do something like: * * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0) * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0) @@ -54,7 +51,6 @@ static inline bool __must_check __must_check_overflow(bool overflow) return unlikely(overflow); } -#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW /* * For simplicity and code hygiene, the fallback code below insists on * a, b and *d having the same type (similar to the min() and max() @@ -90,134 +86,6 @@ static inline bool __must_check __must_check_overflow(bool overflow) __builtin_mul_overflow(__a, __b, __d); \ })) -#else - - -/* Checking for unsigned overflow is relatively easy without causing UB. */ -#define __unsigned_add_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a + __b; \ - *__d < __a; \ -}) -#define __unsigned_sub_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a - __b; \ - __a < __b; \ -}) -/* - * If one of a or b is a compile-time constant, this avoids a division. - */ -#define __unsigned_mul_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a * __b; \ - __builtin_constant_p(__b) ? \ - __b > 0 && __a > type_max(typeof(__a)) / __b : \ - __a > 0 && __b > type_max(typeof(__b)) / __a; \ -}) - -/* - * For signed types, detecting overflow is much harder, especially if - * we want to avoid UB. But the interface of these macros is such that - * we must provide a result in *d, and in fact we must produce the - * result promised by gcc's builtins, which is simply the possibly - * wrapped-around value. Fortunately, we can just formally do the - * operations in the widest relevant unsigned type (u64) and then - * truncate the result - gcc is smart enough to generate the same code - * with and without the (u64) casts. - */ - -/* - * Adding two signed integers can overflow only if they have the same - * sign, and overflow has happened iff the result has the opposite - * sign. - */ -#define __signed_add_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a + (u64)__b; \ - (((~(__a ^ __b)) & (*__d ^ __a)) \ - & type_min(typeof(__a))) != 0; \ -}) - -/* - * Subtraction is similar, except that overflow can now happen only - * when the signs are opposite. In this case, overflow has happened if - * the result has the opposite sign of a. - */ -#define __signed_sub_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a - (u64)__b; \ - ((((__a ^ __b)) & (*__d ^ __a)) \ - & type_min(typeof(__a))) != 0; \ -}) - -/* - * Signed multiplication is rather hard. gcc always follows C99, so - * division is truncated towards 0. This means that we can write the - * overflow check like this: - * - * (a > 0 && (b > MAX/a || b < MIN/a)) || - * (a < -1 && (b > MIN/a || b < MAX/a) || - * (a == -1 && b == MIN) - * - * The redundant casts of -1 are to silence an annoying -Wtype-limits - * (included in -Wextra) warning: When the type is u8 or u16, the - * __b_c_e in check_mul_overflow obviously selects - * __unsigned_mul_overflow, but unfortunately gcc still parses this - * code and warns about the limited range of __b. - */ - -#define __signed_mul_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - typeof(a) __tmax = type_max(typeof(a)); \ - typeof(a) __tmin = type_min(typeof(a)); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a * (u64)__b; \ - (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \ - (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \ - (__b == (typeof(__b))-1 && __a == __tmin); \ -}) - - -#define check_add_overflow(a, b, d) __must_check_overflow( \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_add_overflow(a, b, d), \ - __unsigned_add_overflow(a, b, d))) - -#define check_sub_overflow(a, b, d) __must_check_overflow( \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_sub_overflow(a, b, d), \ - __unsigned_sub_overflow(a, b, d))) - -#define check_mul_overflow(a, b, d) __must_check_overflow( \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_mul_overflow(a, b, d), \ - __unsigned_mul_overflow(a, b, d))) - -#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ - /** check_shl_overflow() - Calculate a left-shifted value and check overflow * * @a: Value to be shifted diff --git a/include/linux/pwm.h b/include/linux/pwm.h index a0b7e43049d5..725c9b784e60 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -404,7 +404,7 @@ int pwm_set_chip_data(struct pwm_device *pwm, void *data); void *pwm_get_chip_data(struct pwm_device *pwm); int pwmchip_add(struct pwm_chip *chip); -int pwmchip_remove(struct pwm_chip *chip); +void pwmchip_remove(struct pwm_chip *chip); int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip); diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 0165824c5128..c0475d1c9885 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -109,6 +109,12 @@ extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, u32 *resp); extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en); + +extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, + u64 limit_node, u32 node_id, u64 version); +extern int qcom_scm_lmh_profile_change(u32 profile_id); +extern bool qcom_scm_lmh_dcvsh_available(void); + #else #include <linux/errno.h> @@ -170,5 +176,13 @@ static inline int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en) { return -ENODEV; } + +static inline int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, + u64 limit_node, u32 node_id, u64 version) + { return -ENODEV; } + +static inline int qcom_scm_lmh_profile_change(u32 profile_id) { return -ENODEV; } + +static inline bool qcom_scm_lmh_dcvsh_available(void) { return -ENODEV; } #endif #endif diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 426e98e0b675..352c6127cb90 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -142,22 +142,14 @@ struct rw_semaphore { #define DECLARE_RWSEM(lockname) \ struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) -#ifdef CONFIG_DEBUG_LOCK_ALLOC -extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name, +extern void __init_rwsem(struct rw_semaphore *rwsem, const char *name, struct lock_class_key *key); -#else -static inline void __rwsem_init(struct rw_semaphore *rwsem, const char *name, - struct lock_class_key *key) -{ -} -#endif #define init_rwsem(sem) \ do { \ static struct lock_class_key __key; \ \ - init_rwbase_rt(&(sem)->rwbase); \ - __rwsem_init((sem), #sem, &__key); \ + __init_rwsem((sem), #sem, &__key); \ } while (0) static __always_inline int rwsem_is_locked(struct rw_semaphore *sem) diff --git a/include/linux/sched.h b/include/linux/sched.h index e12b524426b0..39039ce8ac4c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1471,6 +1471,7 @@ struct task_struct { mce_whole_page : 1, __mce_reserved : 62; struct callback_head mce_kill_me; + int mce_count; #endif #ifdef CONFIG_KRETPROBES diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6bdb0db3e825..841e2f0f5240 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1940,7 +1940,7 @@ static inline void __skb_insert(struct sk_buff *newsk, WRITE_ONCE(newsk->prev, prev); WRITE_ONCE(next->prev, newsk); WRITE_ONCE(prev->next, newsk); - list->qlen++; + WRITE_ONCE(list->qlen, list->qlen + 1); } static inline void __skb_queue_splice(const struct sk_buff_head *list, diff --git a/include/linux/thermal.h b/include/linux/thermal.h index d296f3b88fb9..c314893970b3 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -285,7 +285,7 @@ struct thermal_zone_params { }; /** - * struct thermal_zone_of_device_ops - scallbacks for handling DT based zones + * struct thermal_zone_of_device_ops - callbacks for handling DT based zones * * Mandatory: * @get_temp: a pointer to a function that reads the sensor temperature. @@ -404,12 +404,13 @@ static inline void thermal_zone_device_unregister( struct thermal_zone_device *tz) { } static inline struct thermal_cooling_device * -thermal_cooling_device_register(char *type, void *devdata, +thermal_cooling_device_register(const char *type, void *devdata, const struct thermal_cooling_device_ops *ops) { return ERR_PTR(-ENODEV); } static inline struct thermal_cooling_device * thermal_of_cooling_device_register(struct device_node *np, - char *type, void *devdata, const struct thermal_cooling_device_ops *ops) + const char *type, void *devdata, + const struct thermal_cooling_device_ops *ops) { return ERR_PTR(-ENODEV); } static inline struct thermal_cooling_device * devm_thermal_of_cooling_device_register(struct device *dev, diff --git a/include/linux/time64.h b/include/linux/time64.h index 5117cb5b5656..81b9686a2079 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -25,7 +25,9 @@ struct itimerspec64 { #define TIME64_MIN (-TIME64_MAX - 1) #define KTIME_MAX ((s64)~((u64)1 << 63)) +#define KTIME_MIN (-KTIME_MAX - 1) #define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) +#define KTIME_SEC_MIN (KTIME_MIN / NSEC_PER_SEC) /* * Limits for settimeofday(): @@ -124,10 +126,13 @@ static inline bool timespec64_valid_settod(const struct timespec64 *ts) */ static inline s64 timespec64_to_ns(const struct timespec64 *ts) { - /* Prevent multiplication overflow */ - if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX) + /* Prevent multiplication overflow / underflow */ + if (ts->tv_sec >= KTIME_SEC_MAX) return KTIME_MAX; + if (ts->tv_sec <= KTIME_SEC_MIN) + return KTIME_MIN; + return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; } diff --git a/include/linux/uio.h b/include/linux/uio.h index 5265024e8b90..207101a9c5c3 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -27,6 +27,12 @@ enum iter_type { ITER_DISCARD, }; +struct iov_iter_state { + size_t iov_offset; + size_t count; + unsigned long nr_segs; +}; + struct iov_iter { u8 iter_type; bool data_source; @@ -47,7 +53,6 @@ struct iov_iter { }; loff_t xarray_start; }; - size_t truncated; }; static inline enum iter_type iov_iter_type(const struct iov_iter *i) @@ -55,6 +60,14 @@ static inline enum iter_type iov_iter_type(const struct iov_iter *i) return i->iter_type; } +static inline void iov_iter_save_state(struct iov_iter *iter, + struct iov_iter_state *state) +{ + state->iov_offset = iter->iov_offset; + state->count = iter->count; + state->nr_segs = iter->nr_segs; +} + static inline bool iter_is_iovec(const struct iov_iter *i) { return iov_iter_type(i) == ITER_IOVEC; @@ -233,6 +246,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); +void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state); const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags); @@ -255,10 +269,8 @@ static inline void iov_iter_truncate(struct iov_iter *i, u64 count) * conversion in assignement is by definition greater than all * values of size_t, including old i->count. */ - if (i->count > count) { - i->truncated += i->count - count; + if (i->count > count) i->count = count; - } } /* @@ -267,7 +279,6 @@ static inline void iov_iter_truncate(struct iov_iter *i, u64 count) */ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) { - i->truncated -= count - i->count; i->count = count; } diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 8cfe49d201dd..3972ab765de1 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -43,17 +43,17 @@ struct vdpa_vq_state_split { * @last_used_idx: used index */ struct vdpa_vq_state_packed { - u16 last_avail_counter:1; - u16 last_avail_idx:15; - u16 last_used_counter:1; - u16 last_used_idx:15; + u16 last_avail_counter:1; + u16 last_avail_idx:15; + u16 last_used_counter:1; + u16 last_used_idx:15; }; struct vdpa_vq_state { - union { - struct vdpa_vq_state_split split; - struct vdpa_vq_state_packed packed; - }; + union { + struct vdpa_vq_state_split split; + struct vdpa_vq_state_packed packed; + }; }; struct vdpa_mgmt_dev; @@ -65,6 +65,7 @@ struct vdpa_mgmt_dev; * @config: the configuration ops for this device. * @index: device index * @features_valid: were features initialized? for legacy guests + * @use_va: indicate whether virtual address must be used by this device * @nvqs: maximum number of supported virtqueues * @mdev: management device pointer; caller must setup when registering device as part * of dev_add() mgmtdev ops callback before invoking _vdpa_register_device(). @@ -75,6 +76,7 @@ struct vdpa_device { const struct vdpa_config_ops *config; unsigned int index; bool features_valid; + bool use_va; int nvqs; struct vdpa_mgmt_dev *mdev; }; @@ -90,6 +92,16 @@ struct vdpa_iova_range { }; /** + * Corresponding file area for device memory mapping + * @file: vma->vm_file for the mapping + * @offset: mapping offset in the vm_file + */ +struct vdpa_map_file { + struct file *file; + u64 offset; +}; + +/** * struct vdpa_config_ops - operations for configuring a vDPA device. * Note: vDPA device drivers are required to implement all of the * operations unless it is mentioned to be optional in the following @@ -131,7 +143,7 @@ struct vdpa_iova_range { * @vdev: vdpa device * @idx: virtqueue index * @state: pointer to returned state (last_avail_idx) - * @get_vq_notification: Get the notification area for a virtqueue + * @get_vq_notification: Get the notification area for a virtqueue * @vdev: vdpa device * @idx: virtqueue index * Returns the notifcation area @@ -171,6 +183,9 @@ struct vdpa_iova_range { * @set_status: Set the device status * @vdev: vdpa device * @status: virtio device status + * @reset: Reset device + * @vdev: vdpa device + * Returns integer: success (0) or error (< 0) * @get_config_size: Get the size of the configuration space * @vdev: vdpa device * Returns size_t: configuration size @@ -255,6 +270,7 @@ struct vdpa_config_ops { u32 (*get_vendor_id)(struct vdpa_device *vdev); u8 (*get_status)(struct vdpa_device *vdev); void (*set_status)(struct vdpa_device *vdev, u8 status); + int (*reset)(struct vdpa_device *vdev); size_t (*get_config_size)(struct vdpa_device *vdev); void (*get_config)(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len); @@ -266,7 +282,7 @@ struct vdpa_config_ops { /* DMA ops */ int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb); int (*dma_map)(struct vdpa_device *vdev, u64 iova, u64 size, - u64 pa, u32 perm); + u64 pa, u32 perm, void *opaque); int (*dma_unmap)(struct vdpa_device *vdev, u64 iova, u64 size); /* Free device resources */ @@ -275,7 +291,8 @@ struct vdpa_config_ops { struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, - size_t size, const char *name); + size_t size, const char *name, + bool use_va); /** * vdpa_alloc_device - allocate and initilaize a vDPA device @@ -285,15 +302,16 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, * @parent: the parent device * @config: the bus operations that is supported by this device * @name: name of the vdpa device + * @use_va: indicate whether virtual address must be used by this device * * Return allocated data structure or ERR_PTR upon error */ -#define vdpa_alloc_device(dev_struct, member, parent, config, name) \ +#define vdpa_alloc_device(dev_struct, member, parent, config, name, use_va) \ container_of(__vdpa_alloc_device( \ parent, config, \ sizeof(dev_struct) + \ BUILD_BUG_ON_ZERO(offsetof( \ - dev_struct, member)), name), \ + dev_struct, member)), name, use_va), \ dev_struct, member) int vdpa_register_device(struct vdpa_device *vdev, int nvqs); @@ -348,27 +366,27 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev) return vdev->dma_dev; } -static inline void vdpa_reset(struct vdpa_device *vdev) +static inline int vdpa_reset(struct vdpa_device *vdev) { - const struct vdpa_config_ops *ops = vdev->config; + const struct vdpa_config_ops *ops = vdev->config; vdev->features_valid = false; - ops->set_status(vdev, 0); + return ops->reset(vdev); } static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features) { - const struct vdpa_config_ops *ops = vdev->config; + const struct vdpa_config_ops *ops = vdev->config; vdev->features_valid = true; - return ops->set_features(vdev, features); + return ops->set_features(vdev, features); } - -static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset, - void *buf, unsigned int len) +static inline void vdpa_get_config(struct vdpa_device *vdev, + unsigned int offset, void *buf, + unsigned int len) { - const struct vdpa_config_ops *ops = vdev->config; + const struct vdpa_config_ops *ops = vdev->config; /* * Config accesses aren't supposed to trigger before features are set. diff --git a/include/linux/vhost_iotlb.h b/include/linux/vhost_iotlb.h index 6b09b786a762..2d0e2f52f938 100644 --- a/include/linux/vhost_iotlb.h +++ b/include/linux/vhost_iotlb.h @@ -17,6 +17,7 @@ struct vhost_iotlb_map { u32 perm; u32 flags_padding; u64 __subtree_last; + void *opaque; }; #define VHOST_IOTLB_FLAG_RETIRE 0x1 @@ -29,6 +30,8 @@ struct vhost_iotlb { unsigned int flags; }; +int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb, u64 start, u64 last, + u64 addr, unsigned int perm, void *opaque); int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, u64 start, u64 last, u64 addr, unsigned int perm); void vhost_iotlb_del_range(struct vhost_iotlb *iotlb, u64 start, u64 last); diff --git a/include/net/dsa.h b/include/net/dsa.h index f9a17145255a..258867eff230 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -447,6 +447,11 @@ static inline bool dsa_port_is_user(struct dsa_port *dp) return dp->type == DSA_PORT_TYPE_USER; } +static inline bool dsa_port_is_unused(struct dsa_port *dp) +{ + return dp->type == DSA_PORT_TYPE_UNUSED; +} + static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) { return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED; diff --git a/include/uapi/linux/cifs/cifs_mount.h b/include/uapi/linux/cifs/cifs_mount.h index 69829205fdb5..8e87d27b0951 100644 --- a/include/uapi/linux/cifs/cifs_mount.h +++ b/include/uapi/linux/cifs/cifs_mount.h @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: LGPL-2.1+ WITH Linux-syscall-note */ /* - * include/uapi/linux/cifs/cifs_mount.h * * Author(s): Scott Lovenberg (scott.lovenberg@gmail.com) * diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 59ef35154e3d..b270a07b285e 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -317,13 +317,19 @@ enum { IORING_REGISTER_IOWQ_AFF = 17, IORING_UNREGISTER_IOWQ_AFF = 18, - /* set/get max number of workers */ + /* set/get max number of io-wq workers */ IORING_REGISTER_IOWQ_MAX_WORKERS = 19, /* this goes last */ IORING_REGISTER_LAST }; +/* io-wq worker categories */ +enum { + IO_WQ_BOUND, + IO_WQ_UNBOUND, +}; + /* deprecated, see struct io_uring_rsrc_update */ struct io_uring_files_update { __u32 offset; diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h new file mode 100644 index 000000000000..7cfe1c1280c0 --- /dev/null +++ b/include/uapi/linux/vduse.h @@ -0,0 +1,306 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_VDUSE_H_ +#define _UAPI_VDUSE_H_ + +#include <linux/types.h> + +#define VDUSE_BASE 0x81 + +/* The ioctls for control device (/dev/vduse/control) */ + +#define VDUSE_API_VERSION 0 + +/* + * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION). + * This is used for future extension. + */ +#define VDUSE_GET_API_VERSION _IOR(VDUSE_BASE, 0x00, __u64) + +/* Set the version of VDUSE API that userspace supported. */ +#define VDUSE_SET_API_VERSION _IOW(VDUSE_BASE, 0x01, __u64) + +/** + * struct vduse_dev_config - basic configuration of a VDUSE device + * @name: VDUSE device name, needs to be NUL terminated + * @vendor_id: virtio vendor id + * @device_id: virtio device id + * @features: virtio features + * @vq_num: the number of virtqueues + * @vq_align: the allocation alignment of virtqueue's metadata + * @reserved: for future use, needs to be initialized to zero + * @config_size: the size of the configuration space + * @config: the buffer of the configuration space + * + * Structure used by VDUSE_CREATE_DEV ioctl to create VDUSE device. + */ +struct vduse_dev_config { +#define VDUSE_NAME_MAX 256 + char name[VDUSE_NAME_MAX]; + __u32 vendor_id; + __u32 device_id; + __u64 features; + __u32 vq_num; + __u32 vq_align; + __u32 reserved[13]; + __u32 config_size; + __u8 config[]; +}; + +/* Create a VDUSE device which is represented by a char device (/dev/vduse/$NAME) */ +#define VDUSE_CREATE_DEV _IOW(VDUSE_BASE, 0x02, struct vduse_dev_config) + +/* + * Destroy a VDUSE device. Make sure there are no more references + * to the char device (/dev/vduse/$NAME). + */ +#define VDUSE_DESTROY_DEV _IOW(VDUSE_BASE, 0x03, char[VDUSE_NAME_MAX]) + +/* The ioctls for VDUSE device (/dev/vduse/$NAME) */ + +/** + * struct vduse_iotlb_entry - entry of IOTLB to describe one IOVA region [start, last] + * @offset: the mmap offset on returned file descriptor + * @start: start of the IOVA region + * @last: last of the IOVA region + * @perm: access permission of the IOVA region + * + * Structure used by VDUSE_IOTLB_GET_FD ioctl to find an overlapped IOVA region. + */ +struct vduse_iotlb_entry { + __u64 offset; + __u64 start; + __u64 last; +#define VDUSE_ACCESS_RO 0x1 +#define VDUSE_ACCESS_WO 0x2 +#define VDUSE_ACCESS_RW 0x3 + __u8 perm; +}; + +/* + * Find the first IOVA region that overlaps with the range [start, last] + * and return the corresponding file descriptor. Return -EINVAL means the + * IOVA region doesn't exist. Caller should set start and last fields. + */ +#define VDUSE_IOTLB_GET_FD _IOWR(VDUSE_BASE, 0x10, struct vduse_iotlb_entry) + +/* + * Get the negotiated virtio features. It's a subset of the features in + * struct vduse_dev_config which can be accepted by virtio driver. It's + * only valid after FEATURES_OK status bit is set. + */ +#define VDUSE_DEV_GET_FEATURES _IOR(VDUSE_BASE, 0x11, __u64) + +/** + * struct vduse_config_data - data used to update configuration space + * @offset: the offset from the beginning of configuration space + * @length: the length to write to configuration space + * @buffer: the buffer used to write from + * + * Structure used by VDUSE_DEV_SET_CONFIG ioctl to update device + * configuration space. + */ +struct vduse_config_data { + __u32 offset; + __u32 length; + __u8 buffer[]; +}; + +/* Set device configuration space */ +#define VDUSE_DEV_SET_CONFIG _IOW(VDUSE_BASE, 0x12, struct vduse_config_data) + +/* + * Inject a config interrupt. It's usually used to notify virtio driver + * that device configuration space has changed. + */ +#define VDUSE_DEV_INJECT_CONFIG_IRQ _IO(VDUSE_BASE, 0x13) + +/** + * struct vduse_vq_config - basic configuration of a virtqueue + * @index: virtqueue index + * @max_size: the max size of virtqueue + * @reserved: for future use, needs to be initialized to zero + * + * Structure used by VDUSE_VQ_SETUP ioctl to setup a virtqueue. + */ +struct vduse_vq_config { + __u32 index; + __u16 max_size; + __u16 reserved[13]; +}; + +/* + * Setup the specified virtqueue. Make sure all virtqueues have been + * configured before the device is attached to vDPA bus. + */ +#define VDUSE_VQ_SETUP _IOW(VDUSE_BASE, 0x14, struct vduse_vq_config) + +/** + * struct vduse_vq_state_split - split virtqueue state + * @avail_index: available index + */ +struct vduse_vq_state_split { + __u16 avail_index; +}; + +/** + * struct vduse_vq_state_packed - packed virtqueue state + * @last_avail_counter: last driver ring wrap counter observed by device + * @last_avail_idx: device available index + * @last_used_counter: device ring wrap counter + * @last_used_idx: used index + */ +struct vduse_vq_state_packed { + __u16 last_avail_counter; + __u16 last_avail_idx; + __u16 last_used_counter; + __u16 last_used_idx; +}; + +/** + * struct vduse_vq_info - information of a virtqueue + * @index: virtqueue index + * @num: the size of virtqueue + * @desc_addr: address of desc area + * @driver_addr: address of driver area + * @device_addr: address of device area + * @split: split virtqueue state + * @packed: packed virtqueue state + * @ready: ready status of virtqueue + * + * Structure used by VDUSE_VQ_GET_INFO ioctl to get virtqueue's information. + */ +struct vduse_vq_info { + __u32 index; + __u32 num; + __u64 desc_addr; + __u64 driver_addr; + __u64 device_addr; + union { + struct vduse_vq_state_split split; + struct vduse_vq_state_packed packed; + }; + __u8 ready; +}; + +/* Get the specified virtqueue's information. Caller should set index field. */ +#define VDUSE_VQ_GET_INFO _IOWR(VDUSE_BASE, 0x15, struct vduse_vq_info) + +/** + * struct vduse_vq_eventfd - eventfd configuration for a virtqueue + * @index: virtqueue index + * @fd: eventfd, -1 means de-assigning the eventfd + * + * Structure used by VDUSE_VQ_SETUP_KICKFD ioctl to setup kick eventfd. + */ +struct vduse_vq_eventfd { + __u32 index; +#define VDUSE_EVENTFD_DEASSIGN -1 + int fd; +}; + +/* + * Setup kick eventfd for specified virtqueue. The kick eventfd is used + * by VDUSE kernel module to notify userspace to consume the avail vring. + */ +#define VDUSE_VQ_SETUP_KICKFD _IOW(VDUSE_BASE, 0x16, struct vduse_vq_eventfd) + +/* + * Inject an interrupt for specific virtqueue. It's used to notify virtio driver + * to consume the used vring. + */ +#define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32) + +/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */ + +/** + * enum vduse_req_type - request type + * @VDUSE_GET_VQ_STATE: get the state for specified virtqueue from userspace + * @VDUSE_SET_STATUS: set the device status + * @VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for + * specified IOVA range via VDUSE_IOTLB_GET_FD ioctl + */ +enum vduse_req_type { + VDUSE_GET_VQ_STATE, + VDUSE_SET_STATUS, + VDUSE_UPDATE_IOTLB, +}; + +/** + * struct vduse_vq_state - virtqueue state + * @index: virtqueue index + * @split: split virtqueue state + * @packed: packed virtqueue state + */ +struct vduse_vq_state { + __u32 index; + union { + struct vduse_vq_state_split split; + struct vduse_vq_state_packed packed; + }; +}; + +/** + * struct vduse_dev_status - device status + * @status: device status + */ +struct vduse_dev_status { + __u8 status; +}; + +/** + * struct vduse_iova_range - IOVA range [start, last] + * @start: start of the IOVA range + * @last: last of the IOVA range + */ +struct vduse_iova_range { + __u64 start; + __u64 last; +}; + +/** + * struct vduse_dev_request - control request + * @type: request type + * @request_id: request id + * @reserved: for future use + * @vq_state: virtqueue state, only index field is available + * @s: device status + * @iova: IOVA range for updating + * @padding: padding + * + * Structure used by read(2) on /dev/vduse/$NAME. + */ +struct vduse_dev_request { + __u32 type; + __u32 request_id; + __u32 reserved[4]; + union { + struct vduse_vq_state vq_state; + struct vduse_dev_status s; + struct vduse_iova_range iova; + __u32 padding[32]; + }; +}; + +/** + * struct vduse_dev_response - response to control request + * @request_id: corresponding request id + * @result: the result of request + * @reserved: for future use, needs to be initialized to zero + * @vq_state: virtqueue state + * @padding: padding + * + * Structure used by write(2) on /dev/vduse/$NAME. + */ +struct vduse_dev_response { + __u32 request_id; +#define VDUSE_REQ_RESULT_OK 0x00 +#define VDUSE_REQ_RESULT_FAILED 0x01 + __u32 result; + __u32 reserved[4]; + union { + struct vduse_vq_state vq_state; + __u32 padding[32]; + }; +}; + +#endif /* _UAPI_VDUSE_H_ */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 50d352f5e86f..80d76b75bccd 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -54,9 +54,18 @@ #define VIRTIO_ID_SOUND 25 /* virtio sound */ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ +#define VIRTIO_ID_RPMB 28 /* virtio rpmb */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ +#define VIRTIO_ID_VIDEO_ENCODER 30 /* virtio video encoder */ +#define VIRTIO_ID_VIDEO_DECODER 31 /* virtio video decoder */ #define VIRTIO_ID_SCMI 32 /* virtio SCMI */ +#define VIRTIO_ID_NITRO_SEC_MOD 33 /* virtio nitro secure module*/ #define VIRTIO_ID_I2C_ADAPTER 34 /* virtio i2c adapter */ +#define VIRTIO_ID_WATCHDOG 35 /* virtio watchdog */ +#define VIRTIO_ID_CAN 36 /* virtio can */ +#define VIRTIO_ID_DMABUF 37 /* virtio dmabuf */ +#define VIRTIO_ID_PARAM_SERV 38 /* virtio parameter server */ +#define VIRTIO_ID_AUDIO_POLICY 39 /* virtio audio policy */ #define VIRTIO_ID_BT 40 /* virtio bluetooth */ #define VIRTIO_ID_GPIO 41 /* virtio gpio */ diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h index 3dd3555b2740..64738838bee5 100644 --- a/include/uapi/linux/virtio_vsock.h +++ b/include/uapi/linux/virtio_vsock.h @@ -97,7 +97,8 @@ enum virtio_vsock_shutdown { /* VIRTIO_VSOCK_OP_RW flags values */ enum virtio_vsock_rw { - VIRTIO_VSOCK_SEQ_EOR = 1, + VIRTIO_VSOCK_SEQ_EOM = 1, + VIRTIO_VSOCK_SEQ_EOR = 2, }; #endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */ diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index a47a731e4527..7cc2a0f3f2f5 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -276,7 +276,17 @@ enum hl_device_status { HL_DEVICE_STATUS_OPERATIONAL, HL_DEVICE_STATUS_IN_RESET, HL_DEVICE_STATUS_MALFUNCTION, - HL_DEVICE_STATUS_NEEDS_RESET + HL_DEVICE_STATUS_NEEDS_RESET, + HL_DEVICE_STATUS_IN_DEVICE_CREATION, + HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION +}; + +enum hl_server_type { + HL_SERVER_TYPE_UNKNOWN = 0, + HL_SERVER_GAUDI_HLS1 = 1, + HL_SERVER_GAUDI_HLS1H = 2, + HL_SERVER_GAUDI_TYPE1 = 3, + HL_SERVER_GAUDI_TYPE2 = 4 }; /* Opcode for management ioctl @@ -337,17 +347,49 @@ enum hl_device_status { #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 +/** + * struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC + * @sram_base_address: The first SRAM physical base address that is free to be + * used by the user. + * @dram_base_address: The first DRAM virtual or physical base address that is + * free to be used by the user. + * @dram_size: The DRAM size that is available to the user. + * @sram_size: The SRAM size that is available to the user. + * @num_of_events: The number of events that can be received from the f/w. This + * is needed so the user can what is the size of the h/w events + * array he needs to pass to the kernel when he wants to fetch + * the event counters. + * @device_id: PCI device ID of the ASIC. + * @module_id: Module ID of the ASIC for mezzanine cards in servers + * (From OCP spec). + * @first_available_interrupt_id: The first available interrupt ID for the user + * to be used when it works with user interrupts. + * @server_type: Server type that the Gaudi ASIC is currently installed in. + * The value is according to enum hl_server_type + * @cpld_version: CPLD version on the board. + * @psoc_pci_pll_nr: PCI PLL NR value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_nf: PCI PLL NF value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_od: PCI PLL OD value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_div_factor: PCI PLL DIV factor value. Needed by the profiler + * in some ASICs. + * @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant + * for Goya/Gaudi only. + * @dram_enabled: Whether the DRAM is enabled. + * @cpucp_version: The CPUCP f/w version. + * @card_name: The card name as passed by the f/w. + * @dram_page_size: The DRAM physical page size. + */ struct hl_info_hw_ip_info { __u64 sram_base_address; __u64 dram_base_address; __u64 dram_size; __u32 sram_size; __u32 num_of_events; - __u32 device_id; /* PCI Device ID */ - __u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */ + __u32 device_id; + __u32 module_id; __u32 reserved; __u16 first_available_interrupt_id; - __u16 reserved2; + __u16 server_type; __u32 cpld_version; __u32 psoc_pci_pll_nr; __u32 psoc_pci_pll_nf; @@ -358,7 +400,7 @@ struct hl_info_hw_ip_info { __u8 pad[2]; __u8 cpucp_version[HL_INFO_VERSION_MAX_LEN]; __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN]; - __u64 reserved3; + __u64 reserved2; __u64 dram_page_size; }; @@ -628,12 +670,21 @@ struct hl_cs_chunk { __u64 cb_handle; /* Relevant only when HL_CS_FLAGS_WAIT or - * HL_CS_FLAGS_COLLECTIVE_WAIT is set. + * HL_CS_FLAGS_COLLECTIVE_WAIT is set * This holds address of array of u64 values that contain - * signal CS sequence numbers. The wait described by this job - * will listen on all those signals (wait event per signal) + * signal CS sequence numbers. The wait described by + * this job will listen on all those signals + * (wait event per signal) */ __u64 signal_seq_arr; + + /* + * Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set + * along with HL_CS_FLAGS_ENCAP_SIGNALS. + * This is the CS sequence which has the encapsulated signals. + */ + __u64 encaps_signal_seq; }; /* Index of queue to put the CB on */ @@ -651,6 +702,17 @@ struct hl_cs_chunk { * Number of entries in signal_seq_arr */ __u32 num_signal_seq_arr; + + /* Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set along + * with HL_CS_FLAGS_ENCAP_SIGNALS + * This set the signals range that the user want to wait for + * out of the whole reserved signals range. + * e.g if the signals range is 20, and user don't want + * to wait for signal 8, so he set this offset to 7, then + * he call the API again with 9 and so on till 20. + */ + __u32 encaps_signal_offset; }; /* HL_CS_CHUNK_FLAGS_* */ @@ -678,6 +740,28 @@ struct hl_cs_chunk { #define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200 #define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400 +/* + * The encapsulated signals CS is merged into the existing CS ioctls. + * In order to use this feature need to follow the below procedure: + * 1. Reserve signals, set the CS type to HL_CS_FLAGS_RESERVE_SIGNALS_ONLY + * the output of this API will be the SOB offset from CFG_BASE. + * this address will be used to patch CB cmds to do the signaling for this + * SOB by incrementing it's value. + * for reverting the reservation use HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY + * CS type, note that this might fail if out-of-sync happened to the SOB + * value, in case other signaling request to the same SOB occurred between + * reserve-unreserve calls. + * 2. Use the staged CS to do the encapsulated signaling jobs. + * use HL_CS_FLAGS_STAGED_SUBMISSION and HL_CS_FLAGS_STAGED_SUBMISSION_FIRST + * along with HL_CS_FLAGS_ENCAP_SIGNALS flag, and set encaps_signal_offset + * field. This offset allows app to wait on part of the reserved signals. + * 3. Use WAIT/COLLECTIVE WAIT CS along with HL_CS_FLAGS_ENCAP_SIGNALS flag + * to wait for the encapsulated signals. + */ +#define HL_CS_FLAGS_ENCAP_SIGNALS 0x800 +#define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000 +#define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000 + #define HL_CS_STATUS_SUCCESS 0 #define HL_MAX_JOBS_PER_CS 512 @@ -690,10 +774,35 @@ struct hl_cs_in { /* holds address of array of hl_cs_chunk for execution phase */ __u64 chunks_execute; - /* Sequence number of a staged submission CS - * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set - */ - __u64 seq; + union { + /* + * Sequence number of a staged submission CS + * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set and + * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST is unset. + */ + __u64 seq; + + /* + * Encapsulated signals handle id + * Valid for two flows: + * 1. CS with encapsulated signals: + * when HL_CS_FLAGS_STAGED_SUBMISSION and + * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST + * and HL_CS_FLAGS_ENCAP_SIGNALS are set. + * 2. unreserve signals: + * valid when HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY is set. + */ + __u32 encaps_sig_handle_id; + + /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ + struct { + /* Encapsulated signals number */ + __u32 encaps_signals_count; + + /* Encapsulated signals queue index (stream) */ + __u32 encaps_signals_q_idx; + }; + }; /* Number of chunks in restore phase array. Maximum number is * HL_MAX_JOBS_PER_CS @@ -718,14 +827,31 @@ struct hl_cs_in { }; struct hl_cs_out { + union { + /* + * seq holds the sequence number of the CS to pass to wait + * ioctl. All values are valid except for 0 and ULLONG_MAX + */ + __u64 seq; + + /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ + struct { + /* This is the resereved signal handle id */ + __u32 handle_id; + + /* This is the signals count */ + __u32 count; + }; + }; + + /* HL_CS_STATUS */ + __u32 status; + /* - * seq holds the sequence number of the CS to pass to wait ioctl. All - * values are valid except for 0 and ULLONG_MAX + * SOB base address offset + * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ - __u64 seq; - /* HL_CS_STATUS_* */ - __u32 status; - __u32 pad; + __u32 sob_base_addr_offset; }; union hl_cs_args { @@ -735,11 +861,18 @@ union hl_cs_args { #define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 #define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 +#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 + +#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32 struct hl_wait_cs_in { union { struct { - /* Command submission sequence number */ + /* + * In case of wait_cs holds the CS sequence number. + * In case of wait for multi CS hold a user pointer to + * an array of CS sequence numbers + */ __u64 seq; /* Absolute timeout to wait for command submission * in microseconds @@ -767,12 +900,17 @@ struct hl_wait_cs_in { /* Context ID - Currently not in use */ __u32 ctx_id; + /* HL_WAIT_CS_FLAGS_* * If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include * interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK, in order * not to specify an interrupt id ,set mask to all 1s. */ __u32 flags; + + /* Multi CS API info- valid entries in multi-CS array */ + __u8 seq_arr_len; + __u8 pad[7]; }; #define HL_WAIT_CS_STATUS_COMPLETED 0 @@ -789,8 +927,15 @@ struct hl_wait_cs_out { __u32 status; /* HL_WAIT_CS_STATUS_FLAG* */ __u32 flags; - /* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set */ + /* + * valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set + * for wait_cs: timestamp of CS completion + * for wait_multi_cs: timestamp of FIRST CS completion + */ __s64 timestamp_nsec; + /* multi CS completion bitmap */ + __u32 cs_completion_map; + __u32 pad; }; union hl_wait_cs_args { @@ -813,6 +958,7 @@ union hl_wait_cs_args { #define HL_MEM_CONTIGUOUS 0x1 #define HL_MEM_SHARED 0x2 #define HL_MEM_USERPTR 0x4 +#define HL_MEM_FORCE_HINT 0x8 struct hl_mem_in { union { diff --git a/init/main.c b/init/main.c index 5c9a48df90e1..3f7216934441 100644 --- a/init/main.c +++ b/init/main.c @@ -924,7 +924,7 @@ static void __init print_unknown_bootoptions(void) end += sprintf(end, " %s", *p); pr_notice("Unknown command line parameters:%s\n", unknown_options); - memblock_free(__pa(unknown_options), len); + memblock_free_ptr(unknown_options, len); } asmlinkage __visible void __init __no_sanitize_address start_kernel(void) diff --git a/ipc/sem.c b/ipc/sem.c index f833238df1ce..6693daf4fe11 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2238,7 +2238,7 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops, return -EINVAL; if (nsops > SEMOPM_FAST) { - sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL_ACCOUNT); + sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL); if (sops == NULL) return -ENOMEM; } diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index ca3cd9aaa6ce..7b4afb7d96db 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-only +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com * Copyright (c) 2016 Facebook */ diff --git a/kernel/bpf/disasm.h b/kernel/bpf/disasm.h index e546b18d27da..a4b040793f44 100644 --- a/kernel/bpf/disasm.h +++ b/kernel/bpf/disasm.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com * Copyright (c) 2016 Facebook */ diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index e8eefdf8cf3e..09a3fd97d329 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -179,7 +179,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, * with build_id. */ if (!user || !current || !current->mm || irq_work_busy || - !mmap_read_trylock_non_owner(current->mm)) { + !mmap_read_trylock(current->mm)) { /* cannot access current->mm, fall back to ips */ for (i = 0; i < trace_nr; i++) { id_offs[i].status = BPF_STACK_BUILD_ID_IP; @@ -204,9 +204,15 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, } if (!work) { - mmap_read_unlock_non_owner(current->mm); + mmap_read_unlock(current->mm); } else { work->mm = current->mm; + + /* The lock will be released once we're out of interrupt + * context. Tell lockdep that we've released it now so + * it doesn't complain that we forgot to release it. + */ + rwsem_release(¤t->mm->mmap_lock.dep_map, _RET_IP_); irq_work_queue(&work->irq_work); } } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 047ac4b4703b..e76b55917905 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9912,6 +9912,8 @@ static int check_btf_line(struct bpf_verifier_env *env, nr_linfo = attr->line_info_cnt; if (!nr_linfo) return 0; + if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info)) + return -EINVAL; rec_size = attr->line_info_rec_size; if (rec_size < MIN_BPF_LINEINFO_SIZE || diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 881ce1470beb..8afa8690d288 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6572,74 +6572,44 @@ int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v) */ #ifdef CONFIG_SOCK_CGROUP_DATA -#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) - -DEFINE_SPINLOCK(cgroup_sk_update_lock); -static bool cgroup_sk_alloc_disabled __read_mostly; - -void cgroup_sk_alloc_disable(void) -{ - if (cgroup_sk_alloc_disabled) - return; - pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n"); - cgroup_sk_alloc_disabled = true; -} - -#else - -#define cgroup_sk_alloc_disabled false - -#endif - void cgroup_sk_alloc(struct sock_cgroup_data *skcd) { - if (cgroup_sk_alloc_disabled) { - skcd->no_refcnt = 1; - return; - } - /* Don't associate the sock with unrelated interrupted task's cgroup. */ if (in_interrupt()) return; rcu_read_lock(); - while (true) { struct css_set *cset; cset = task_css_set(current); if (likely(cgroup_tryget(cset->dfl_cgrp))) { - skcd->val = (unsigned long)cset->dfl_cgrp; + skcd->cgroup = cset->dfl_cgrp; cgroup_bpf_get(cset->dfl_cgrp); break; } cpu_relax(); } - rcu_read_unlock(); } void cgroup_sk_clone(struct sock_cgroup_data *skcd) { - if (skcd->val) { - if (skcd->no_refcnt) - return; - /* - * We might be cloning a socket which is left in an empty - * cgroup and the cgroup might have already been rmdir'd. - * Don't use cgroup_get_live(). - */ - cgroup_get(sock_cgroup_ptr(skcd)); - cgroup_bpf_get(sock_cgroup_ptr(skcd)); - } + struct cgroup *cgrp = sock_cgroup_ptr(skcd); + + /* + * We might be cloning a socket which is left in an empty + * cgroup and the cgroup might have already been rmdir'd. + * Don't use cgroup_get_live(). + */ + cgroup_get(cgrp); + cgroup_bpf_get(cgrp); } void cgroup_sk_free(struct sock_cgroup_data *skcd) { struct cgroup *cgrp = sock_cgroup_ptr(skcd); - if (skcd->no_refcnt) - return; cgroup_bpf_put(cgrp); cgroup_put(cgrp); } diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 6c90c69e5311..95445bd6eb72 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -567,7 +567,8 @@ static void add_dma_entry(struct dma_debug_entry *entry) pr_err("cacheline tracking ENOMEM, dma-debug disabled\n"); global_disable = true; } else if (rc == -EEXIST) { - pr_err("cacheline tracking EEXIST, overlapping mappings aren't supported\n"); + err_printk(entry->dev, entry, + "cacheline tracking EEXIST, overlapping mappings aren't supported\n"); } } diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 7ee5284bff58..06fec5547e7c 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -206,7 +206,8 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, /** * dma_map_sg_attrs - Map the given buffer for DMA * @dev: The device for which to perform the DMA operation - * @sg: The sg_table object describing the buffer + * @sg: The sg_table object describing the buffer + * @nents: Number of entries to map * @dir: DMA direction * @attrs: Optional DMA attributes for the map operation * diff --git a/kernel/events/core.c b/kernel/events/core.c index 744e8726c5b2..0c000cb01eeb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10193,7 +10193,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event) return; if (ifh->nr_file_filters) { - mm = get_task_mm(event->ctx->task); + mm = get_task_mm(task); if (!mm) goto restart; diff --git a/kernel/futex.c b/kernel/futex.c index e7b4c6121da4..c15ad276fd15 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1263,6 +1263,36 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval, return -ESRCH; } +static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key, + struct futex_pi_state **ps) +{ + /* + * No existing pi state. First waiter. [2] + * + * This creates pi_state, we have hb->lock held, this means nothing can + * observe this state, wait_lock is irrelevant. + */ + struct futex_pi_state *pi_state = alloc_pi_state(); + + /* + * Initialize the pi_mutex in locked state and make @p + * the owner of it: + */ + rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); + + /* Store the key for possible exit cleanups: */ + pi_state->key = *key; + + WARN_ON(!list_empty(&pi_state->list)); + list_add(&pi_state->list, &p->pi_state_list); + /* + * Assignment without holding pi_state->pi_mutex.wait_lock is safe + * because there is no concurrency as the object is not published yet. + */ + pi_state->owner = p; + + *ps = pi_state; +} /* * Lookup the task for the TID provided from user space and attach to * it after doing proper sanity checks. @@ -1272,7 +1302,6 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, struct task_struct **exiting) { pid_t pid = uval & FUTEX_TID_MASK; - struct futex_pi_state *pi_state; struct task_struct *p; /* @@ -1324,36 +1353,11 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, return ret; } - /* - * No existing pi state. First waiter. [2] - * - * This creates pi_state, we have hb->lock held, this means nothing can - * observe this state, wait_lock is irrelevant. - */ - pi_state = alloc_pi_state(); - - /* - * Initialize the pi_mutex in locked state and make @p - * the owner of it: - */ - rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); - - /* Store the key for possible exit cleanups: */ - pi_state->key = *key; - - WARN_ON(!list_empty(&pi_state->list)); - list_add(&pi_state->list, &p->pi_state_list); - /* - * Assignment without holding pi_state->pi_mutex.wait_lock is safe - * because there is no concurrency as the object is not published yet. - */ - pi_state->owner = p; + __attach_to_pi_owner(p, key, ps); raw_spin_unlock_irq(&p->pi_lock); put_task_struct(p); - *ps = pi_state; - return 0; } @@ -1454,8 +1458,26 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, newval |= FUTEX_WAITERS; ret = lock_pi_update_atomic(uaddr, uval, newval); - /* If the take over worked, return 1 */ - return ret < 0 ? ret : 1; + if (ret) + return ret; + + /* + * If the waiter bit was requested the caller also needs PI + * state attached to the new owner of the user space futex. + * + * @task is guaranteed to be alive and it cannot be exiting + * because it is either sleeping or waiting in + * futex_requeue_pi_wakeup_sync(). + * + * No need to do the full attach_to_pi_owner() exercise + * because @task is known and valid. + */ + if (set_waiters) { + raw_spin_lock_irq(&task->pi_lock); + __attach_to_pi_owner(task, key, ps); + raw_spin_unlock_irq(&task->pi_lock); + } + return 1; } /* @@ -1939,12 +1961,26 @@ static inline int futex_requeue_pi_wakeup_sync(struct futex_q *q) * @hb: the hash_bucket of the requeue target futex * * During futex_requeue, with requeue_pi=1, it is possible to acquire the - * target futex if it is uncontended or via a lock steal. Set the futex_q key - * to the requeue target futex so the waiter can detect the wakeup on the right - * futex, but remove it from the hb and NULL the rt_waiter so it can detect - * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock - * to protect access to the pi_state to fixup the owner later. Must be called - * with both q->lock_ptr and hb->lock held. + * target futex if it is uncontended or via a lock steal. + * + * 1) Set @q::key to the requeue target futex key so the waiter can detect + * the wakeup on the right futex. + * + * 2) Dequeue @q from the hash bucket. + * + * 3) Set @q::rt_waiter to NULL so the woken up task can detect atomic lock + * acquisition. + * + * 4) Set the q->lock_ptr to the requeue target hb->lock for the case that + * the waiter has to fixup the pi state. + * + * 5) Complete the requeue state so the waiter can make progress. After + * this point the waiter task can return from the syscall immediately in + * case that the pi state does not have to be fixed up. + * + * 6) Wake the waiter task. + * + * Must be called with both q->lock_ptr and hb->lock held. */ static inline void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, @@ -1998,7 +2034,7 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, { struct futex_q *top_waiter = NULL; u32 curval; - int ret, vpid; + int ret; if (get_futex_value_locked(&curval, pifutex)) return -EFAULT; @@ -2025,7 +2061,7 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, * and waiting on the 'waitqueue' futex which is always !PI. */ if (!top_waiter->rt_waiter || top_waiter->pi_state) - ret = -EINVAL; + return -EINVAL; /* Ensure we requeue to the expected futex. */ if (!match_futex(top_waiter->requeue_pi_key, key2)) @@ -2036,17 +2072,23 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, return -EAGAIN; /* - * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in - * the contended case or if set_waiters is 1. The pi_state is returned - * in ps in contended cases. + * Try to take the lock for top_waiter and set the FUTEX_WAITERS bit + * in the contended case or if @set_waiters is true. + * + * In the contended case PI state is attached to the lock owner. If + * the user space lock can be acquired then PI state is attached to + * the new owner (@top_waiter->task) when @set_waiters is true. */ - vpid = task_pid_vnr(top_waiter->task); ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, exiting, set_waiters); if (ret == 1) { - /* Dequeue, wake up and update top_waiter::requeue_state */ + /* + * Lock was acquired in user space and PI state was + * attached to @top_waiter->task. That means state is fully + * consistent and the waiter can return to user space + * immediately after the wakeup. + */ requeue_pi_wake_futex(top_waiter, key2, hb2); - return vpid; } else if (ret < 0) { /* Rewind top_waiter::requeue_state */ futex_requeue_pi_complete(top_waiter, ret); @@ -2208,19 +2250,26 @@ retry_private: &exiting, nr_requeue); /* - * At this point the top_waiter has either taken uaddr2 or is - * waiting on it. If the former, then the pi_state will not - * exist yet, look it up one more time to ensure we have a - * reference to it. If the lock was taken, @ret contains the - * VPID of the top waiter task. - * If the lock was not taken, we have pi_state and an initial - * refcount on it. In case of an error we have nothing. + * At this point the top_waiter has either taken uaddr2 or + * is waiting on it. In both cases pi_state has been + * established and an initial refcount on it. In case of an + * error there's nothing. * * The top waiter's requeue_state is up to date: * - * - If the lock was acquired atomically (ret > 0), then + * - If the lock was acquired atomically (ret == 1), then * the state is Q_REQUEUE_PI_LOCKED. * + * The top waiter has been dequeued and woken up and can + * return to user space immediately. The kernel/user + * space state is consistent. In case that there must be + * more waiters requeued the WAITERS bit in the user + * space futex is set so the top waiter task has to go + * into the syscall slowpath to unlock the futex. This + * will block until this requeue operation has been + * completed and the hash bucket locks have been + * dropped. + * * - If the trylock failed with an error (ret < 0) then * the state is either Q_REQUEUE_PI_NONE, i.e. "nothing * happened", or Q_REQUEUE_PI_IGNORE when there was an @@ -2234,36 +2283,20 @@ retry_private: * the same sanity checks for requeue_pi as the loop * below does. */ - if (ret > 0) { - WARN_ON(pi_state); - task_count++; - /* - * If futex_proxy_trylock_atomic() acquired the - * user space futex, then the user space value - * @uaddr2 has been set to the @hb1's top waiter - * task VPID. This task is guaranteed to be alive - * and cannot be exiting because it is either - * sleeping or blocked on @hb2 lock. - * - * The @uaddr2 futex cannot have waiters either as - * otherwise futex_proxy_trylock_atomic() would not - * have succeeded. - * - * In order to requeue waiters to @hb2, pi state is - * required. Hand in the VPID value (@ret) and - * allocate PI state with an initial refcount on - * it. - */ - ret = attach_to_pi_owner(uaddr2, ret, &key2, &pi_state, - &exiting); - WARN_ON(ret); - } - switch (ret) { case 0: /* We hold a reference on the pi state. */ break; + case 1: + /* + * futex_proxy_trylock_atomic() acquired the user space + * futex. Adjust task_count. + */ + task_count++; + ret = 0; + break; + /* * If the above failed, then pi_state is NULL and * waiter::requeue_state is correct. @@ -2395,9 +2428,8 @@ retry_private: } /* - * We took an extra initial reference to the pi_state either in - * futex_proxy_trylock_atomic() or in attach_to_pi_owner(). We need - * to drop it here again. + * We took an extra initial reference to the pi_state in + * futex_proxy_trylock_atomic(). We need to drop it here again. */ put_pi_state(pi_state); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 8eabdc79602b..6bb116c559b4 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -753,7 +753,7 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, * other configuration and we fail to report; also, see * lockdep. */ - if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter->ww_ctx) + if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx) ret = 0; raw_spin_unlock(&lock->wait_lock); diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c index 4ba15088e640..88191f6e252c 100644 --- a/kernel/locking/rwbase_rt.c +++ b/kernel/locking/rwbase_rt.c @@ -41,6 +41,12 @@ * The risk of writer starvation is there, but the pathological use cases * which trigger it are not necessarily the typical RT workloads. * + * Fast-path orderings: + * The lock/unlock of readers can run in fast paths: lock and unlock are only + * atomic ops, and there is no inner lock to provide ACQUIRE and RELEASE + * semantics of rwbase_rt. Atomic ops should thus provide _acquire() + * and _release() (or stronger). + * * Common code shared between RT rw_semaphore and rwlock */ @@ -53,6 +59,7 @@ static __always_inline int rwbase_read_trylock(struct rwbase_rt *rwb) * set. */ for (r = atomic_read(&rwb->readers); r < 0;) { + /* Fully-ordered if cmpxchg() succeeds, provides ACQUIRE */ if (likely(atomic_try_cmpxchg(&rwb->readers, &r, r + 1))) return 1; } @@ -162,6 +169,8 @@ static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb, /* * rwb->readers can only hit 0 when a writer is waiting for the * active readers to leave the critical section. + * + * dec_and_test() is fully ordered, provides RELEASE. */ if (unlikely(atomic_dec_and_test(&rwb->readers))) __rwbase_read_unlock(rwb, state); @@ -172,7 +181,11 @@ static inline void __rwbase_write_unlock(struct rwbase_rt *rwb, int bias, { struct rt_mutex_base *rtm = &rwb->rtmutex; - atomic_add(READER_BIAS - bias, &rwb->readers); + /* + * _release() is needed in case that reader is in fast path, pairing + * with atomic_try_cmpxchg() in rwbase_read_trylock(), provides RELEASE + */ + (void)atomic_add_return_release(READER_BIAS - bias, &rwb->readers); raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); rwbase_rtmutex_unlock(rtm); } @@ -196,6 +209,23 @@ static inline void rwbase_write_downgrade(struct rwbase_rt *rwb) __rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags); } +static inline bool __rwbase_write_trylock(struct rwbase_rt *rwb) +{ + /* Can do without CAS because we're serialized by wait_lock. */ + lockdep_assert_held(&rwb->rtmutex.wait_lock); + + /* + * _acquire is needed in case the reader is in the fast path, pairing + * with rwbase_read_unlock(), provides ACQUIRE. + */ + if (!atomic_read_acquire(&rwb->readers)) { + atomic_set(&rwb->readers, WRITER_BIAS); + return 1; + } + + return 0; +} + static int __sched rwbase_write_lock(struct rwbase_rt *rwb, unsigned int state) { @@ -210,34 +240,30 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, atomic_sub(READER_BIAS, &rwb->readers); raw_spin_lock_irqsave(&rtm->wait_lock, flags); - /* - * set_current_state() for rw_semaphore - * current_save_and_set_rtlock_wait_state() for rwlock - */ - rwbase_set_and_save_current_state(state); + if (__rwbase_write_trylock(rwb)) + goto out_unlock; - /* Block until all readers have left the critical section. */ - for (; atomic_read(&rwb->readers);) { + rwbase_set_and_save_current_state(state); + for (;;) { /* Optimized out for rwlocks */ if (rwbase_signal_pending_state(state, current)) { - __set_current_state(TASK_RUNNING); + rwbase_restore_current_state(); __rwbase_write_unlock(rwb, 0, flags); return -EINTR; } + + if (__rwbase_write_trylock(rwb)) + break; + raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); + rwbase_schedule(); + raw_spin_lock_irqsave(&rtm->wait_lock, flags); - /* - * Schedule and wait for the readers to leave the critical - * section. The last reader leaving it wakes the waiter. - */ - if (atomic_read(&rwb->readers) != 0) - rwbase_schedule(); set_current_state(state); - raw_spin_lock_irqsave(&rtm->wait_lock, flags); } - - atomic_set(&rwb->readers, WRITER_BIAS); rwbase_restore_current_state(); + +out_unlock: raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); return 0; } @@ -253,8 +279,7 @@ static inline int rwbase_write_trylock(struct rwbase_rt *rwb) atomic_sub(READER_BIAS, &rwb->readers); raw_spin_lock_irqsave(&rtm->wait_lock, flags); - if (!atomic_read(&rwb->readers)) { - atomic_set(&rwb->readers, WRITER_BIAS); + if (__rwbase_write_trylock(rwb)) { raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); return 1; } diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 9215b4d6a9de..000e8d5a2884 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -1376,15 +1376,17 @@ static inline void __downgrade_write(struct rw_semaphore *sem) #include "rwbase_rt.c" -#ifdef CONFIG_DEBUG_LOCK_ALLOC -void __rwsem_init(struct rw_semaphore *sem, const char *name, +void __init_rwsem(struct rw_semaphore *sem, const char *name, struct lock_class_key *key) { + init_rwbase_rt(&(sem)->rwbase); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC debug_check_no_locks_freed((void *)sem, sizeof(*sem)); lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP); -} -EXPORT_SYMBOL(__rwsem_init); #endif +} +EXPORT_SYMBOL(__init_rwsem); static inline void __down_read(struct rw_semaphore *sem) { diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 825277e1e742..a8d0a58deebc 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1166,9 +1166,9 @@ void __init setup_log_buf(int early) return; err_free_descs: - memblock_free(__pa(new_descs), new_descs_size); + memblock_free_ptr(new_descs, new_descs_size); err_free_log_buf: - memblock_free(__pa(new_log_buf), new_log_buf_len); + memblock_free_ptr(new_log_buf, new_log_buf_len); } static bool __read_mostly ignore_loglevel; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c4462c454ab9..1bba4128a3e6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8836,7 +8836,6 @@ static void balance_push(struct rq *rq) struct task_struct *push_task = rq->curr; lockdep_assert_rq_held(rq); - SCHED_WARN_ON(rq->cpu != smp_processor_id()); /* * Ensure the thing is persistent until balance_push_set(.on = false); @@ -8844,9 +8843,10 @@ static void balance_push(struct rq *rq) rq->balance_callback = &balance_push_callback; /* - * Only active while going offline. + * Only active while going offline and when invoked on the outgoing + * CPU. */ - if (!cpu_dying(rq->cpu)) + if (!cpu_dying(rq->cpu) || rq != this_rq()) return; /* diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 912b47aa99d8..d17b0a5ce6ac 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -379,10 +379,10 @@ void play_idle_precise(u64 duration_ns, u64 latency_ns) cpuidle_use_deepest_state(latency_ns); it.done = 0; - hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); it.timer.function = idle_inject_timer_fn; hrtimer_start(&it.timer, ns_to_ktime(duration_ns), - HRTIMER_MODE_REL_PINNED); + HRTIMER_MODE_REL_PINNED_HARD); while (!READ_ONCE(it.done)) do_idle(); diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 388e65d05978..8d252f63cd78 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -219,13 +219,12 @@ static int __init trace_boot_hist_add_array(struct xbc_node *hnode, char **bufp, char *end, const char *key) { - struct xbc_node *knode, *anode; + struct xbc_node *anode; const char *p; char sep; - knode = xbc_node_find_child(hnode, key); - if (knode) { - anode = xbc_node_get_child(knode); + p = xbc_node_find_value(hnode, key, &anode); + if (p) { if (!anode) { pr_err("hist.%s requires value(s).\n", key); return -EINVAL; @@ -263,9 +262,9 @@ trace_boot_hist_add_one_handler(struct xbc_node *hnode, char **bufp, append_printf(bufp, end, ":%s(%s)", handler, p); /* Compose 'action' parameter */ - knode = xbc_node_find_child(hnode, "trace"); + knode = xbc_node_find_subkey(hnode, "trace"); if (!knode) - knode = xbc_node_find_child(hnode, "save"); + knode = xbc_node_find_subkey(hnode, "save"); if (knode) { anode = xbc_node_get_child(knode); @@ -284,7 +283,7 @@ trace_boot_hist_add_one_handler(struct xbc_node *hnode, char **bufp, sep = ','; } append_printf(bufp, end, ")"); - } else if (xbc_node_find_child(hnode, "snapshot")) { + } else if (xbc_node_find_subkey(hnode, "snapshot")) { append_printf(bufp, end, ".snapshot()"); } else { pr_err("hist.%s requires an action.\n", @@ -315,7 +314,7 @@ trace_boot_hist_add_handlers(struct xbc_node *hnode, char **bufp, break; } - if (xbc_node_find_child(hnode, param)) + if (xbc_node_find_subkey(hnode, param)) ret = trace_boot_hist_add_one_handler(hnode, bufp, end, handler, param); return ret; @@ -375,7 +374,7 @@ trace_boot_compose_hist_cmd(struct xbc_node *hnode, char *buf, size_t size) if (p) append_printf(&buf, end, ":name=%s", p); - node = xbc_node_find_child(hnode, "var"); + node = xbc_node_find_subkey(hnode, "var"); if (node) { xbc_node_for_each_key_value(node, knode, p) { /* Expression must not include spaces. */ @@ -386,21 +385,21 @@ trace_boot_compose_hist_cmd(struct xbc_node *hnode, char *buf, size_t size) } /* Histogram control attributes (mutual exclusive) */ - if (xbc_node_find_child(hnode, "pause")) + if (xbc_node_find_value(hnode, "pause", NULL)) append_printf(&buf, end, ":pause"); - else if (xbc_node_find_child(hnode, "continue")) + else if (xbc_node_find_value(hnode, "continue", NULL)) append_printf(&buf, end, ":continue"); - else if (xbc_node_find_child(hnode, "clear")) + else if (xbc_node_find_value(hnode, "clear", NULL)) append_printf(&buf, end, ":clear"); /* Histogram handler and actions */ - node = xbc_node_find_child(hnode, "onmax"); + node = xbc_node_find_subkey(hnode, "onmax"); if (node && trace_boot_hist_add_handlers(node, &buf, end, "var") < 0) return -EINVAL; - node = xbc_node_find_child(hnode, "onchange"); + node = xbc_node_find_subkey(hnode, "onchange"); if (node && trace_boot_hist_add_handlers(node, &buf, end, "var") < 0) return -EINVAL; - node = xbc_node_find_child(hnode, "onmatch"); + node = xbc_node_find_subkey(hnode, "onmatch"); if (node && trace_boot_hist_add_handlers(node, &buf, end, "event") < 0) return -EINVAL; @@ -437,7 +436,7 @@ trace_boot_init_histograms(struct trace_event_file *file, } } - if (xbc_node_find_child(hnode, "keys")) { + if (xbc_node_find_subkey(hnode, "keys")) { if (trace_boot_compose_hist_cmd(hnode, buf, size) == 0) { tmp = kstrdup(buf, GFP_KERNEL); if (trigger_process_regex(file, buf) < 0) @@ -496,7 +495,7 @@ trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode, else if (trigger_process_regex(file, buf) < 0) pr_err("Failed to apply an action: %s\n", p); } - anode = xbc_node_find_child(enode, "hist"); + anode = xbc_node_find_subkey(enode, "hist"); if (anode) trace_boot_init_histograms(file, anode, buf, ARRAY_SIZE(buf)); } else if (xbc_node_find_value(enode, "actions", NULL)) @@ -518,7 +517,7 @@ trace_boot_init_events(struct trace_array *tr, struct xbc_node *node) bool enable, enable_all = false; const char *data; - node = xbc_node_find_child(node, "event"); + node = xbc_node_find_subkey(node, "event"); if (!node) return; /* per-event key starts with "event.GROUP.EVENT" */ @@ -621,7 +620,7 @@ trace_boot_init_instances(struct xbc_node *node) struct trace_array *tr; const char *p; - node = xbc_node_find_child(node, "instance"); + node = xbc_node_find_subkey(node, "instance"); if (!node) return; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ed4a31e34098..d566f601780f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -295,7 +295,7 @@ config DEBUG_INFO_DWARF4 config DEBUG_INFO_DWARF5 bool "Generate DWARF Version 5 debuginfo" - depends on GCC_VERSION >= 50000 || (CC_IS_CLANG && (AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502))) + depends on !CC_IS_CLANG || (CC_IS_CLANG && (AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502))) depends on !DEBUG_INFO_BTF help Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc diff --git a/lib/bootconfig.c b/lib/bootconfig.c index 927017431fb6..5ae248b29373 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -142,16 +142,16 @@ xbc_node_match_prefix(struct xbc_node *node, const char **prefix) } /** - * xbc_node_find_child() - Find a child node which matches given key + * xbc_node_find_subkey() - Find a subkey node which matches given key * @parent: An XBC node. * @key: A key string. * - * Search a node under @parent which matches @key. The @key can contain + * Search a key node under @parent which matches @key. The @key can contain * several words jointed with '.'. If @parent is NULL, this searches the * node from whole tree. Return NULL if no node is matched. */ struct xbc_node * __init -xbc_node_find_child(struct xbc_node *parent, const char *key) +xbc_node_find_subkey(struct xbc_node *parent, const char *key) { struct xbc_node *node; @@ -191,7 +191,7 @@ const char * __init xbc_node_find_value(struct xbc_node *parent, const char *key, struct xbc_node **vnode) { - struct xbc_node *node = xbc_node_find_child(parent, key); + struct xbc_node *node = xbc_node_find_subkey(parent, key); if (!node || !xbc_node_is_key(node)) return NULL; @@ -792,7 +792,7 @@ void __init xbc_destroy_all(void) xbc_data = NULL; xbc_data_size = 0; xbc_node_num = 0; - memblock_free(__pa(xbc_nodes), sizeof(struct xbc_node) * XBC_NODE_MAX); + memblock_free_ptr(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX); xbc_nodes = NULL; brace_index = 0; } diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f2d50d69a6c3..755c10c5138c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1972,3 +1972,39 @@ int import_single_range(int rw, void __user *buf, size_t len, return 0; } EXPORT_SYMBOL(import_single_range); + +/** + * iov_iter_restore() - Restore a &struct iov_iter to the same state as when + * iov_iter_save_state() was called. + * + * @i: &struct iov_iter to restore + * @state: state to restore from + * + * Used after iov_iter_save_state() to bring restore @i, if operations may + * have advanced it. + * + * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC + */ +void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) +{ + if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) && + !iov_iter_is_kvec(i)) + return; + i->iov_offset = state->iov_offset; + i->count = state->count; + /* + * For the *vec iters, nr_segs + iov is constant - if we increment + * the vec, then we also decrement the nr_segs count. Hence we don't + * need to track both of these, just one is enough and we can deduct + * the other from that. ITER_KVEC and ITER_IOVEC are the same struct + * size, so we can just increment the iov pointer as they are unionzed. + * ITER_BVEC _may_ be the same size on some archs, but on others it is + * not. Be safe and handle it separately. + */ + BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); + if (iov_iter_is_bvec(i)) + i->bvec -= state->nr_segs - i->nr_segs; + else + i->iov -= state->nr_segs - i->nr_segs; + i->nr_segs = state->nr_segs; +} diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c index 2d3eb1cb73b8..ce39ce9f3526 100644 --- a/lib/pci_iomap.c +++ b/lib/pci_iomap.c @@ -134,4 +134,47 @@ void __iomem *pci_iomap_wc(struct pci_dev *dev, int bar, unsigned long maxlen) return pci_iomap_wc_range(dev, bar, 0, maxlen); } EXPORT_SYMBOL_GPL(pci_iomap_wc); + +/* + * pci_iounmap() somewhat illogically comes from lib/iomap.c for the + * CONFIG_GENERIC_IOMAP case, because that's the code that knows about + * the different IOMAP ranges. + * + * But if the architecture does not use the generic iomap code, and if + * it has _not_ defined it's own private pci_iounmap function, we define + * it here. + * + * NOTE! This default implementation assumes that if the architecture + * support ioport mapping (HAS_IOPORT_MAP), the ioport mapping will + * be fixed to the range [ PCI_IOBASE, PCI_IOBASE+IO_SPACE_LIMIT [, + * and does not need unmapping with 'ioport_unmap()'. + * + * If you have different rules for your architecture, you need to + * implement your own pci_iounmap() that knows the rules for where + * and how IO vs MEM get mapped. + * + * This code is odd, and the ARCH_HAS/ARCH_WANTS #define logic comes + * from legacy <asm-generic/io.h> header file behavior. In particular, + * it would seem to make sense to do the iounmap(p) for the non-IO-space + * case here regardless, but that's not what the old header file code + * did. Probably incorrectly, but this is meant to be bug-for-bug + * compatible. + */ +#if defined(ARCH_WANTS_GENERIC_PCI_IOUNMAP) + +void pci_iounmap(struct pci_dev *dev, void __iomem *p) +{ +#ifdef ARCH_HAS_GENERIC_IOPORT_MAP + uintptr_t start = (uintptr_t) PCI_IOBASE; + uintptr_t addr = (uintptr_t) p; + + if (addr >= start && addr < start + IO_SPACE_LIMIT) + return; + iounmap(p); +#endif +} +EXPORT_SYMBOL(pci_iounmap); + +#endif /* ARCH_WANTS_GENERIC_PCI_IOUNMAP */ + #endif /* CONFIG_PCI */ @@ -651,10 +651,8 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node) * from &migrate_nodes. This will verify that future list.h changes * don't break STABLE_NODE_DUP_HEAD. Only recent gcc can handle it. */ -#if defined(GCC_VERSION) && GCC_VERSION >= 40903 BUILD_BUG_ON(STABLE_NODE_DUP_HEAD <= &migrate_nodes); BUILD_BUG_ON(STABLE_NODE_DUP_HEAD >= &migrate_nodes + 1); -#endif if (stable_node->head == &migrate_nodes) list_del(&stable_node->list); diff --git a/mm/memblock.c b/mm/memblock.c index 0ab5a749bfa6..184dcd2e5d99 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -472,7 +472,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, kfree(old_array); else if (old_array != memblock_memory_init_regions && old_array != memblock_reserved_init_regions) - memblock_free(__pa(old_array), old_alloc_size); + memblock_free_ptr(old_array, old_alloc_size); /* * Reserve the new array if that comes from the memblock. Otherwise, we @@ -796,6 +796,20 @@ int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) } /** + * memblock_free_ptr - free boot memory allocation + * @ptr: starting address of the boot memory allocation + * @size: size of the boot memory block in bytes + * + * Free boot memory block previously allocated by memblock_alloc_xx() API. + * The freeing memory will not be released to the buddy allocator. + */ +void __init_memblock memblock_free_ptr(void *ptr, size_t size) +{ + if (ptr) + memblock_free(__pa(ptr), size); +} + +/** * memblock_free - free boot memory block * @base: phys starting address of the boot memory block * @size: size of the boot memory block in bytes diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 37b67194c0df..414dc5671c45 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -53,20 +53,6 @@ struct chnl_net { enum caif_states state; }; -static void robust_list_del(struct list_head *delete_node) -{ - struct list_head *list_node; - struct list_head *n; - ASSERT_RTNL(); - list_for_each_safe(list_node, n, &chnl_net_list) { - if (list_node == delete_node) { - list_del(list_node); - return; - } - } - WARN_ON(1); -} - static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) { struct sk_buff *skb; @@ -364,6 +350,7 @@ static int chnl_net_init(struct net_device *dev) ASSERT_RTNL(); priv = netdev_priv(dev); strncpy(priv->name, dev->name, sizeof(priv->name)); + INIT_LIST_HEAD(&priv->list_field); return 0; } @@ -372,7 +359,7 @@ static void chnl_net_uninit(struct net_device *dev) struct chnl_net *priv; ASSERT_RTNL(); priv = netdev_priv(dev); - robust_list_del(&priv->list_field); + list_del_init(&priv->list_field); } static const struct net_device_ops netdev_ops = { @@ -537,7 +524,7 @@ static void __exit chnl_exit_module(void) rtnl_lock(); list_for_each_safe(list_node, _tmp, &chnl_net_list) { dev = list_entry(list_node, struct chnl_net, list_field); - list_del(list_node); + list_del_init(list_node); delete_device(dev); } rtnl_unlock(); diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index b49c57d35a88..1a6a86693b74 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -71,11 +71,8 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n) struct update_classid_context *ctx = (void *)v; struct socket *sock = sock_from_file(file); - if (sock) { - spin_lock(&cgroup_sk_update_lock); + if (sock) sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid); - spin_unlock(&cgroup_sk_update_lock); - } if (--ctx->batch == 0) { ctx->batch = UPDATE_CLASSID_BATCH; return n + 1; @@ -121,8 +118,6 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, struct css_task_iter it; struct task_struct *p; - cgroup_sk_alloc_disable(); - cs->classid = (u32)value; css_task_iter_start(css, 0, &it); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 99a431c56f23..8456dfbe2eb4 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -207,8 +207,6 @@ static ssize_t write_priomap(struct kernfs_open_file *of, if (!dev) return -ENODEV; - cgroup_sk_alloc_disable(); - rtnl_lock(); ret = netprio_set_prio(of_css(of), dev, prio); @@ -221,12 +219,10 @@ static ssize_t write_priomap(struct kernfs_open_file *of, static int update_netprio(const void *v, struct file *file, unsigned n) { struct socket *sock = sock_from_file(file); - if (sock) { - spin_lock(&cgroup_sk_update_lock); + + if (sock) sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data, (unsigned long)v); - spin_unlock(&cgroup_sk_update_lock); - } return 0; } @@ -235,8 +231,6 @@ static void net_prio_attach(struct cgroup_taskset *tset) struct task_struct *p; struct cgroup_subsys_state *css; - cgroup_sk_alloc_disable(); - cgroup_taskset_for_each(p, css, tset) { void *v = (void *)(unsigned long)css->id; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index c5c74a34d139..91e7a2202697 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -94,6 +94,8 @@ struct sock *dccp_create_openreq_child(const struct sock *sk, newdp->dccps_role = DCCP_ROLE_SERVER; newdp->dccps_hc_rx_ackvec = NULL; newdp->dccps_service_list = NULL; + newdp->dccps_hc_rx_ccid = NULL; + newdp->dccps_hc_tx_ccid = NULL; newdp->dccps_service = dreq->dreq_service; newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo; newdp->dccps_timestamp_time = dreq->dreq_timestamp_time; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 1dc45e40f961..41f36ad8b0ec 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -345,6 +345,11 @@ bool dsa_schedule_work(struct work_struct *work) return queue_work(dsa_owq, work); } +void dsa_flush_workqueue(void) +{ + flush_workqueue(dsa_owq); +} + int dsa_devlink_param_get(struct devlink *dl, u32 id, struct devlink_param_gset_ctx *ctx) { diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 1b2b25d7bd02..eef13cd20f19 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -897,6 +897,33 @@ static void dsa_switch_teardown(struct dsa_switch *ds) ds->setup = false; } +/* First tear down the non-shared, then the shared ports. This ensures that + * all work items scheduled by our switchdev handlers for user ports have + * completed before we destroy the refcounting kept on the shared ports. + */ +static void dsa_tree_teardown_ports(struct dsa_switch_tree *dst) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_is_user(dp) || dsa_port_is_unused(dp)) + dsa_port_teardown(dp); + + dsa_flush_workqueue(); + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) + dsa_port_teardown(dp); +} + +static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + dsa_switch_teardown(dp->ds); +} + static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) { struct dsa_port *dp; @@ -923,26 +950,13 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) return 0; teardown: - list_for_each_entry(dp, &dst->ports, list) - dsa_port_teardown(dp); + dsa_tree_teardown_ports(dst); - list_for_each_entry(dp, &dst->ports, list) - dsa_switch_teardown(dp->ds); + dsa_tree_teardown_switches(dst); return err; } -static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst) -{ - struct dsa_port *dp; - - list_for_each_entry(dp, &dst->ports, list) - dsa_port_teardown(dp); - - list_for_each_entry(dp, &dst->ports, list) - dsa_switch_teardown(dp->ds); -} - static int dsa_tree_setup_master(struct dsa_switch_tree *dst) { struct dsa_port *dp; @@ -1052,6 +1066,8 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_master(dst); + dsa_tree_teardown_ports(dst); + dsa_tree_teardown_switches(dst); dsa_tree_teardown_cpu_ports(dst); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 33ab7d7af9eb..a5c9bc7b66c6 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -170,6 +170,7 @@ void dsa_tag_driver_put(const struct dsa_device_ops *ops); const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf); bool dsa_schedule_work(struct work_struct *work); +void dsa_flush_workqueue(void); const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops); static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 662ff531d4e2..a2bf2d8ac65b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1854,13 +1854,11 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) * use the switch internal MDIO bus instead */ ret = dsa_slave_phy_connect(slave_dev, dp->index, phy_flags); - if (ret) { - netdev_err(slave_dev, - "failed to connect to port %d: %d\n", - dp->index, ret); - phylink_destroy(dp->pl); - return ret; - } + } + if (ret) { + netdev_err(slave_dev, "failed to connect to PHY: %pe\n", + ERR_PTR(ret)); + phylink_destroy(dp->pl); } return ret; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3f7bd7ae7d7a..141e85e6422b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1346,7 +1346,7 @@ static u8 tcp_sacktag_one(struct sock *sk, if (dup_sack && (sacked & TCPCB_RETRANS)) { if (tp->undo_marker && tp->undo_retrans > 0 && after(end_seq, tp->undo_marker)) - tp->undo_retrans--; + tp->undo_retrans = max_t(int, 0, tp->undo_retrans - pcount); if ((sacked & TCPCB_SACKED_ACKED) && before(start_seq, state->reord)) state->reord = start_seq; diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c index 0d122edc368d..b91003538d87 100644 --- a/net/ipv4/udp_tunnel_nic.c +++ b/net/ipv4/udp_tunnel_nic.c @@ -935,7 +935,7 @@ static int __init udp_tunnel_nic_init_module(void) { int err; - udp_tunnel_nic_workqueue = alloc_workqueue("udp_tunnel_nic", 0, 0); + udp_tunnel_nic_workqueue = alloc_ordered_workqueue("udp_tunnel_nic", 0); if (!udp_tunnel_nic_workqueue) return -ENOMEM; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1bec5b22f80d..0371d2c14145 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1378,7 +1378,6 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, int err = -ENOMEM; int allow_create = 1; int replace_required = 0; - int sernum = fib6_new_sernum(info->nl_net); if (info->nlh) { if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) @@ -1478,7 +1477,7 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, if (!err) { if (rt->nh) list_add(&rt->nh_list, &rt->nh->f6i_list); - __fib6_update_sernum_upto_root(rt, sernum); + __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net)); fib6_start_gc(info->nl_net, rt); } diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 53486b162f01..93271a2632b8 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -869,8 +869,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) } if (tunnel->version == L2TP_HDR_VER_3 && - l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) { + l2tp_session_dec_refcount(session); goto invalid; + } l2tp_recv_common(session, skb, ptr, optr, hdrflags, length); l2tp_session_dec_refcount(session); diff --git a/net/mctp/route.c b/net/mctp/route.c index 5265525011ad..5ca186d53cb0 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -1083,8 +1083,10 @@ static void __net_exit mctp_routes_net_exit(struct net *net) { struct mctp_route *rt; + rcu_read_lock(); list_for_each_entry_rcu(rt, &net->mctp.routes, list) mctp_route_release(rt); + rcu_read_unlock(); } static struct pernet_operations mctp_net_ops = { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 543365f58e97..2a2bc64f75cf 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -46,6 +46,8 @@ * Copyright (C) 2011, <lokec@ccs.neu.edu> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/ethtool.h> #include <linux/types.h> #include <linux/mm.h> diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a0a27d87f631..ad570c2450be 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2423,7 +2423,7 @@ static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, u32 dport, struct sk_buff_head *xmitq) { - unsigned long time_limit = jiffies + 2; + unsigned long time_limit = jiffies + usecs_to_jiffies(20000); struct sk_buff *skb; unsigned int lim; atomic_t *dcnt; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index eb47b9de2380..92345c9bb60c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3073,7 +3073,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, other = unix_peer(sk); if (other && unix_peer(other) != sk && - unix_recvq_full(other) && + unix_recvq_full_lockless(other) && unix_dgram_peer_wake_me(sk, other)) writable = 0; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 3e02cc3b24f8..e2c0cfb334d2 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -2014,7 +2014,7 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg, { const struct vsock_transport *transport; struct vsock_sock *vsk; - ssize_t record_len; + ssize_t msg_len; long timeout; int err = 0; DEFINE_WAIT(wait); @@ -2028,9 +2028,9 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg, if (err <= 0) goto out; - record_len = transport->seqpacket_dequeue(vsk, msg, flags); + msg_len = transport->seqpacket_dequeue(vsk, msg, flags); - if (record_len < 0) { + if (msg_len < 0) { err = -ENOMEM; goto out; } @@ -2044,14 +2044,14 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg, * packet. */ if (flags & MSG_TRUNC) - err = record_len; + err = msg_len; else err = len - msg_data_left(msg); /* Always set MSG_TRUNC if real length of packet is * bigger than user's buffer. */ - if (record_len > len) + if (msg_len > len) msg->msg_flags |= MSG_TRUNC; } diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 081e7ae93cb1..59ee1be5a6dd 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -76,8 +76,12 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, goto out; if (msg_data_left(info->msg) == 0 && - info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) - pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); + info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) { + pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); + + if (info->msg->msg_flags & MSG_EOR) + pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); + } } trace_virtio_transport_alloc_pkt(src_cid, src_port, @@ -457,9 +461,12 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, dequeued_len += pkt_len; } - if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) { + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) { msg_ready = true; vvs->msg_count--; + + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) + msg->msg_flags |= MSG_EOR; } virtio_transport_dec_rx_pkt(vvs, pkt); @@ -1029,7 +1036,7 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, goto out; } - if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) vvs->msg_count++; /* Try to copy small packets into the buffer of last packet queued, @@ -1044,12 +1051,12 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, /* If there is space in the last packet queued, we copy the * new packet in its buffer. We avoid this if the last packet - * queued has VIRTIO_VSOCK_SEQ_EOR set, because this is - * delimiter of SEQPACKET record, so 'pkt' is the first packet - * of a new record. + * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is + * delimiter of SEQPACKET message, so 'pkt' is the first packet + * of a new message. */ if ((pkt->len <= last_pkt->buf_len - last_pkt->len) && - !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)) { + !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)) { memcpy(last_pkt->buf + last_pkt->len, pkt->buf, pkt->len); last_pkt->len += pkt->len; diff --git a/scripts/Makefile.clang b/scripts/Makefile.clang index 4cce8fd0779c..51fc23e2e9e5 100644 --- a/scripts/Makefile.clang +++ b/scripts/Makefile.clang @@ -29,7 +29,12 @@ CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) else CLANG_FLAGS += -fintegrated-as endif +# By default, clang only warns when it encounters an unknown warning flag or +# certain optimization flags it knows it has not implemented. +# Make it behave more like gcc by erroring when these flags are encountered +# so they can be implemented or wrapped in cc-option. CLANG_FLAGS += -Werror=unknown-warning-option +CLANG_FLAGS += -Werror=ignored-optimization-argument KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_AFLAGS += $(CLANG_FLAGS) export CLANG_FLAGS diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index eef56d629799..48585c4d04ad 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -13,7 +13,7 @@ # Stage 2 is handled by this file and does the following # 1) Find all modules listed in modules.order # 2) modpost is then used to -# 3) create one <module>.mod.c file pr. module +# 3) create one <module>.mod.c file per module # 4) create one Module.symvers file with CRC for all exported symbols # Step 3 is used to place certain information in the module's ELF diff --git a/scripts/checkkconfigsymbols.py b/scripts/checkkconfigsymbols.py index b9b0f15e5880..217d21abc86e 100755 --- a/scripts/checkkconfigsymbols.py +++ b/scripts/checkkconfigsymbols.py @@ -34,7 +34,6 @@ REGEX_SOURCE_SYMBOL = re.compile(SOURCE_SYMBOL) REGEX_KCONFIG_DEF = re.compile(DEF) REGEX_KCONFIG_EXPR = re.compile(EXPR) REGEX_KCONFIG_STMT = re.compile(STMT) -REGEX_KCONFIG_HELP = re.compile(r"^\s+help\s*$") REGEX_FILTER_SYMBOLS = re.compile(r"[A-Za-z0-9]$") REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+") REGEX_QUOTES = re.compile("(\"(.*?)\")") @@ -102,6 +101,9 @@ def parse_options(): "continue.") if args.commit: + if args.commit.startswith('HEAD'): + sys.exit("The --commit option can't use the HEAD ref") + args.find = False if args.ignore: @@ -432,7 +434,6 @@ def parse_kconfig_file(kfile): lines = [] defined = [] references = [] - skip = False if not os.path.exists(kfile): return defined, references @@ -448,12 +449,6 @@ def parse_kconfig_file(kfile): if REGEX_KCONFIG_DEF.match(line): symbol_def = REGEX_KCONFIG_DEF.findall(line) defined.append(symbol_def[0]) - skip = False - elif REGEX_KCONFIG_HELP.match(line): - skip = True - elif skip: - # ignore content of help messages - pass elif REGEX_KCONFIG_STMT.match(line): line = REGEX_QUOTES.sub("", line) symbols = get_symbols_in_line(line) diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py index 0033eedce003..1d1bde1fd45e 100755 --- a/scripts/clang-tools/gen_compile_commands.py +++ b/scripts/clang-tools/gen_compile_commands.py @@ -13,6 +13,7 @@ import logging import os import re import subprocess +import sys _DEFAULT_OUTPUT = 'compile_commands.json' _DEFAULT_LOG_LEVEL = 'WARNING' diff --git a/scripts/coccinelle/api/kvmalloc.cocci b/scripts/coccinelle/api/kvmalloc.cocci index c30dab718a49..5ddcb76b76b0 100644 --- a/scripts/coccinelle/api/kvmalloc.cocci +++ b/scripts/coccinelle/api/kvmalloc.cocci @@ -79,7 +79,7 @@ position p : script:python() { relevant(p) }; } else { ... when != krealloc(E, ...) when any -* \(kfree\|kzfree\)(E) +* \(kfree\|kfree_sensitive\)(E) ... } diff --git a/scripts/coccinelle/iterators/use_after_iter.cocci b/scripts/coccinelle/iterators/use_after_iter.cocci index 9be48b520879..676edd562eef 100644 --- a/scripts/coccinelle/iterators/use_after_iter.cocci +++ b/scripts/coccinelle/iterators/use_after_iter.cocci @@ -123,6 +123,8 @@ hlist_for_each_entry_safe(c,...) S | list_remove_head(x,c,...) | +list_entry_is_head(c,...) +| sizeof(<+...c...+>) | &c->member diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh index 319f92104f56..4edc708baa63 100755 --- a/scripts/min-tool-version.sh +++ b/scripts/min-tool-version.sh @@ -17,13 +17,7 @@ binutils) echo 2.23.0 ;; gcc) - # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63293 - # https://lore.kernel.org/r/20210107111841.GN1551@shell.armlinux.org.uk - if [ "$SRCARCH" = arm64 ]; then - echo 5.1.0 - else - echo 4.9.0 - fi + echo 5.1.0 ;; icc) # temporary diff --git a/scripts/sorttable.c b/scripts/sorttable.c index 0ef3abfc4a51..f355869c65cd 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -349,6 +349,7 @@ static int do_file(char const *const fname, void *addr) case EM_ARM: case EM_MICROBLAZE: case EM_MIPS: + case EM_RISCV: case EM_XTENSA: break; default: diff --git a/tools/arch/x86/include/asm/amd-ibs.h b/tools/arch/x86/include/asm/amd-ibs.h new file mode 100644 index 000000000000..174e7d83fcbd --- /dev/null +++ b/tools/arch/x86/include/asm/amd-ibs.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * From PPR Vol 1 for AMD Family 19h Model 01h B1 + * 55898 Rev 0.35 - Feb 5, 2021 + */ + +#include "msr-index.h" + +/* + * IBS Hardware MSRs + */ + +/* MSR 0xc0011030: IBS Fetch Control */ +union ibs_fetch_ctl { + __u64 val; + struct { + __u64 fetch_maxcnt:16,/* 0-15: instruction fetch max. count */ + fetch_cnt:16, /* 16-31: instruction fetch count */ + fetch_lat:16, /* 32-47: instruction fetch latency */ + fetch_en:1, /* 48: instruction fetch enable */ + fetch_val:1, /* 49: instruction fetch valid */ + fetch_comp:1, /* 50: instruction fetch complete */ + ic_miss:1, /* 51: i-cache miss */ + phy_addr_valid:1,/* 52: physical address valid */ + l1tlb_pgsz:2, /* 53-54: i-cache L1TLB page size + * (needs IbsPhyAddrValid) */ + l1tlb_miss:1, /* 55: i-cache fetch missed in L1TLB */ + l2tlb_miss:1, /* 56: i-cache fetch missed in L2TLB */ + rand_en:1, /* 57: random tagging enable */ + fetch_l2_miss:1,/* 58: L2 miss for sampled fetch + * (needs IbsFetchComp) */ + reserved:5; /* 59-63: reserved */ + }; +}; + +/* MSR 0xc0011033: IBS Execution Control */ +union ibs_op_ctl { + __u64 val; + struct { + __u64 opmaxcnt:16, /* 0-15: periodic op max. count */ + reserved0:1, /* 16: reserved */ + op_en:1, /* 17: op sampling enable */ + op_val:1, /* 18: op sample valid */ + cnt_ctl:1, /* 19: periodic op counter control */ + opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */ + reserved1:5, /* 27-31: reserved */ + opcurcnt:27, /* 32-58: periodic op counter current count */ + reserved2:5; /* 59-63: reserved */ + }; +}; + +/* MSR 0xc0011035: IBS Op Data 2 */ +union ibs_op_data { + __u64 val; + struct { + __u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */ + tag_to_ret_ctr:16, /* 15-31: op tag to retire count */ + reserved1:2, /* 32-33: reserved */ + op_return:1, /* 34: return op */ + op_brn_taken:1, /* 35: taken branch op */ + op_brn_misp:1, /* 36: mispredicted branch op */ + op_brn_ret:1, /* 37: branch op retired */ + op_rip_invalid:1, /* 38: RIP is invalid */ + op_brn_fuse:1, /* 39: fused branch op */ + op_microcode:1, /* 40: microcode op */ + reserved2:23; /* 41-63: reserved */ + }; +}; + +/* MSR 0xc0011036: IBS Op Data 2 */ +union ibs_op_data2 { + __u64 val; + struct { + __u64 data_src:3, /* 0-2: data source */ + reserved0:1, /* 3: reserved */ + rmt_node:1, /* 4: destination node */ + cache_hit_st:1, /* 5: cache hit state */ + reserved1:57; /* 5-63: reserved */ + }; +}; + +/* MSR 0xc0011037: IBS Op Data 3 */ +union ibs_op_data3 { + __u64 val; + struct { + __u64 ld_op:1, /* 0: load op */ + st_op:1, /* 1: store op */ + dc_l1tlb_miss:1, /* 2: data cache L1TLB miss */ + dc_l2tlb_miss:1, /* 3: data cache L2TLB hit in 2M page */ + dc_l1tlb_hit_2m:1, /* 4: data cache L1TLB hit in 2M page */ + dc_l1tlb_hit_1g:1, /* 5: data cache L1TLB hit in 1G page */ + dc_l2tlb_hit_2m:1, /* 6: data cache L2TLB hit in 2M page */ + dc_miss:1, /* 7: data cache miss */ + dc_mis_acc:1, /* 8: misaligned access */ + reserved:4, /* 9-12: reserved */ + dc_wc_mem_acc:1, /* 13: write combining memory access */ + dc_uc_mem_acc:1, /* 14: uncacheable memory access */ + dc_locked_op:1, /* 15: locked operation */ + dc_miss_no_mab_alloc:1, /* 16: DC miss with no MAB allocated */ + dc_lin_addr_valid:1, /* 17: data cache linear address valid */ + dc_phy_addr_valid:1, /* 18: data cache physical address valid */ + dc_l2_tlb_hit_1g:1, /* 19: data cache L2 hit in 1GB page */ + l2_miss:1, /* 20: L2 cache miss */ + sw_pf:1, /* 21: software prefetch */ + op_mem_width:4, /* 22-25: load/store size in bytes */ + op_dc_miss_open_mem_reqs:6, /* 26-31: outstanding mem reqs on DC fill */ + dc_miss_lat:16, /* 32-47: data cache miss latency */ + tlb_refill_lat:16; /* 48-63: L1 TLB refill latency */ + }; +}; + +/* MSR 0xc001103c: IBS Fetch Control Extended */ +union ic_ibs_extd_ctl { + __u64 val; + struct { + __u64 itlb_refill_lat:16, /* 0-15: ITLB Refill latency for sampled fetch */ + reserved:48; /* 16-63: reserved */ + }; +}; + +/* + * IBS driver related + */ + +struct perf_ibs_data { + u32 size; + union { + u32 data[0]; /* data buffer starts here */ + u32 caps; + }; + u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; +}; diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index a6c327f8ad9e..2ef1f6513c68 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -295,6 +295,7 @@ struct kvm_debug_exit_arch { #define KVM_GUESTDBG_USE_HW_BP 0x00020000 #define KVM_GUESTDBG_INJECT_DB 0x00040000 #define KVM_GUESTDBG_INJECT_BP 0x00080000 +#define KVM_GUESTDBG_BLOCKIRQ 0x00100000 /* for KVM_SET_GUEST_DEBUG */ struct kvm_guest_debug_arch { diff --git a/tools/bootconfig/include/linux/memblock.h b/tools/bootconfig/include/linux/memblock.h index 7862f217d85d..f2e506f7d57f 100644 --- a/tools/bootconfig/include/linux/memblock.h +++ b/tools/bootconfig/include/linux/memblock.h @@ -4,9 +4,8 @@ #include <stdlib.h> -#define __pa(addr) (addr) #define SMP_CACHE_BYTES 0 #define memblock_alloc(size, align) malloc(size) -#define memblock_free(paddr, size) free(paddr) +#define memblock_free_ptr(paddr, size) free(paddr) #endif diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 95c072b70d0e..8816f06fc6c7 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -16,9 +16,9 @@ # define __fallthrough __attribute__ ((fallthrough)) #endif -#if GCC_VERSION >= 40300 +#if __has_attribute(__error__) # define __compiletime_error(message) __attribute__((error(message))) -#endif /* GCC_VERSION >= 40300 */ +#endif /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) @@ -38,7 +38,3 @@ #endif #define __printf(a, b) __attribute__((format(printf, a, b))) #define __scanf(a, b) __attribute__((format(scanf, a, b))) - -#if GCC_VERSION >= 50100 -#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 -#endif diff --git a/tools/include/linux/overflow.h b/tools/include/linux/overflow.h index 8712ff70995f..dcb0c1bf6866 100644 --- a/tools/include/linux/overflow.h +++ b/tools/include/linux/overflow.h @@ -5,12 +5,9 @@ #include <linux/compiler.h> /* - * In the fallback code below, we need to compute the minimum and - * maximum values representable in a given type. These macros may also - * be useful elsewhere, so we provide them outside the - * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block. - * - * It would seem more obvious to do something like + * We need to compute the minimum and maximum values representable in a given + * type. These macros may also be useful elsewhere. It would seem more obvious + * to do something like: * * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0) * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0) @@ -36,8 +33,6 @@ #define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) #define type_min(T) ((T)((T)-type_max(T)-(T)1)) - -#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW /* * For simplicity and code hygiene, the fallback code below insists on * a, b and *d having the same type (similar to the min() and max() @@ -73,135 +68,6 @@ __builtin_mul_overflow(__a, __b, __d); \ }) -#else - - -/* Checking for unsigned overflow is relatively easy without causing UB. */ -#define __unsigned_add_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a + __b; \ - *__d < __a; \ -}) -#define __unsigned_sub_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a - __b; \ - __a < __b; \ -}) -/* - * If one of a or b is a compile-time constant, this avoids a division. - */ -#define __unsigned_mul_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a * __b; \ - __builtin_constant_p(__b) ? \ - __b > 0 && __a > type_max(typeof(__a)) / __b : \ - __a > 0 && __b > type_max(typeof(__b)) / __a; \ -}) - -/* - * For signed types, detecting overflow is much harder, especially if - * we want to avoid UB. But the interface of these macros is such that - * we must provide a result in *d, and in fact we must produce the - * result promised by gcc's builtins, which is simply the possibly - * wrapped-around value. Fortunately, we can just formally do the - * operations in the widest relevant unsigned type (u64) and then - * truncate the result - gcc is smart enough to generate the same code - * with and without the (u64) casts. - */ - -/* - * Adding two signed integers can overflow only if they have the same - * sign, and overflow has happened iff the result has the opposite - * sign. - */ -#define __signed_add_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a + (u64)__b; \ - (((~(__a ^ __b)) & (*__d ^ __a)) \ - & type_min(typeof(__a))) != 0; \ -}) - -/* - * Subtraction is similar, except that overflow can now happen only - * when the signs are opposite. In this case, overflow has happened if - * the result has the opposite sign of a. - */ -#define __signed_sub_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a - (u64)__b; \ - ((((__a ^ __b)) & (*__d ^ __a)) \ - & type_min(typeof(__a))) != 0; \ -}) - -/* - * Signed multiplication is rather hard. gcc always follows C99, so - * division is truncated towards 0. This means that we can write the - * overflow check like this: - * - * (a > 0 && (b > MAX/a || b < MIN/a)) || - * (a < -1 && (b > MIN/a || b < MAX/a) || - * (a == -1 && b == MIN) - * - * The redundant casts of -1 are to silence an annoying -Wtype-limits - * (included in -Wextra) warning: When the type is u8 or u16, the - * __b_c_e in check_mul_overflow obviously selects - * __unsigned_mul_overflow, but unfortunately gcc still parses this - * code and warns about the limited range of __b. - */ - -#define __signed_mul_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - typeof(a) __tmax = type_max(typeof(a)); \ - typeof(a) __tmin = type_min(typeof(a)); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a * (u64)__b; \ - (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \ - (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \ - (__b == (typeof(__b))-1 && __a == __tmin); \ -}) - - -#define check_add_overflow(a, b, d) \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_add_overflow(a, b, d), \ - __unsigned_add_overflow(a, b, d)) - -#define check_sub_overflow(a, b, d) \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_sub_overflow(a, b, d), \ - __unsigned_sub_overflow(a, b, d)) - -#define check_mul_overflow(a, b, d) \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_mul_overflow(a, b, d), \ - __unsigned_mul_overflow(a, b, d)) - - -#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ - /** * array_size() - Calculate size of 2-dimensional array. * diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index a9d6fcd95f42..1c5fb86d455a 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -673,15 +673,15 @@ __SYSCALL(__NR_madvise, sys_madvise) #define __NR_remap_file_pages 234 __SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) #define __NR_mbind 235 -__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind) +__SYSCALL(__NR_mbind, sys_mbind) #define __NR_get_mempolicy 236 -__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy) +__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) #define __NR_set_mempolicy 237 -__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy) +__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) #define __NR_migrate_pages 238 -__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages) +__SYSCALL(__NR_migrate_pages, sys_migrate_pages) #define __NR_move_pages 239 -__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages) +__SYSCALL(__NR_move_pages, sys_move_pages) #endif #define __NR_rt_tgsigqueueinfo 240 @@ -877,9 +877,11 @@ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) #define __NR_memfd_secret 447 __SYSCALL(__NR_memfd_secret, sys_memfd_secret) #endif +#define __NR_process_mrelease 448 +__SYSCALL(__NR_process_mrelease, sys_process_mrelease) #undef __NR_syscalls -#define __NR_syscalls 448 +#define __NR_syscalls 449 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index d043752a74cf..3b810b53ba8b 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -635,8 +635,8 @@ struct drm_gem_open { /** * DRM_CAP_VBLANK_HIGH_CRTC * - * If set to 1, the kernel supports specifying a CRTC index in the high bits of - * &drm_wait_vblank_request.type. + * If set to 1, the kernel supports specifying a :ref:`CRTC index<crtc_index>` + * in the high bits of &drm_wait_vblank_request.type. * * Starting kernel version 2.6.39, this capability is always set to 1. */ @@ -1050,6 +1050,16 @@ extern "C" { #define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob) #define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd) #define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd) +/** + * DRM_IOCTL_MODE_RMFB - Remove a framebuffer. + * + * This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * Warning: removing a framebuffer currently in-use on an enabled plane will + * disable that plane. The CRTC the plane is linked to may also be disabled + * (depending on driver capabilities). + */ #define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int) #define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) #define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index c2c7759b7d2e..bde5860b3686 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -572,6 +572,15 @@ typedef struct drm_i915_irq_wait { #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) #define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) #define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4) +/* + * Indicates the 2k user priority levels are statically mapped into 3 buckets as + * follows: + * + * -1k to -1 Low priority + * 0 Normal priority + * 1 to 1k Highest priority + */ +#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5) #define I915_PARAM_HUC_STATUS 42 @@ -674,6 +683,9 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 +/* Query if the kernel supports the I915_USERPTR_PROBE flag. */ +#define I915_PARAM_HAS_USERPTR_PROBE 56 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -849,45 +861,113 @@ struct drm_i915_gem_mmap_gtt { __u64 offset; }; +/** + * struct drm_i915_gem_mmap_offset - Retrieve an offset so we can mmap this buffer object. + * + * This struct is passed as argument to the `DRM_IOCTL_I915_GEM_MMAP_OFFSET` ioctl, + * and is used to retrieve the fake offset to mmap an object specified by &handle. + * + * The legacy way of using `DRM_IOCTL_I915_GEM_MMAP` is removed on gen12+. + * `DRM_IOCTL_I915_GEM_MMAP_GTT` is an older supported alias to this struct, but will behave + * as setting the &extensions to 0, and &flags to `I915_MMAP_OFFSET_GTT`. + */ struct drm_i915_gem_mmap_offset { - /** Handle for the object being mapped. */ + /** @handle: Handle for the object being mapped. */ __u32 handle; + /** @pad: Must be zero */ __u32 pad; /** - * Fake offset to use for subsequent mmap call + * @offset: The fake offset to use for subsequent mmap call * * This is a fixed-size type for 32/64 compatibility. */ __u64 offset; /** - * Flags for extended behaviour. + * @flags: Flags for extended behaviour. + * + * It is mandatory that one of the `MMAP_OFFSET` types + * should be included: * - * It is mandatory that one of the MMAP_OFFSET types - * (GTT, WC, WB, UC, etc) should be included. + * - `I915_MMAP_OFFSET_GTT`: Use mmap with the object bound to GTT. (Write-Combined) + * - `I915_MMAP_OFFSET_WC`: Use Write-Combined caching. + * - `I915_MMAP_OFFSET_WB`: Use Write-Back caching. + * - `I915_MMAP_OFFSET_FIXED`: Use object placement to determine caching. + * + * On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only valid + * type. On devices without local memory, this caching mode is invalid. + * + * As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB will + * be used, depending on the object placement on creation. WB will be used + * when the object can only exist in system memory, WC otherwise. */ __u64 flags; -#define I915_MMAP_OFFSET_GTT 0 -#define I915_MMAP_OFFSET_WC 1 -#define I915_MMAP_OFFSET_WB 2 -#define I915_MMAP_OFFSET_UC 3 - /* - * Zero-terminated chain of extensions. +#define I915_MMAP_OFFSET_GTT 0 +#define I915_MMAP_OFFSET_WC 1 +#define I915_MMAP_OFFSET_WB 2 +#define I915_MMAP_OFFSET_UC 3 +#define I915_MMAP_OFFSET_FIXED 4 + + /** + * @extensions: Zero-terminated chain of extensions. * * No current extensions defined; mbz. */ __u64 extensions; }; +/** + * struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in + * preparation for accessing the pages via some CPU domain. + * + * Specifying a new write or read domain will flush the object out of the + * previous domain(if required), before then updating the objects domain + * tracking with the new domain. + * + * Note this might involve waiting for the object first if it is still active on + * the GPU. + * + * Supported values for @read_domains and @write_domain: + * + * - I915_GEM_DOMAIN_WC: Uncached write-combined domain + * - I915_GEM_DOMAIN_CPU: CPU cache domain + * - I915_GEM_DOMAIN_GTT: Mappable aperture domain + * + * All other domains are rejected. + * + * Note that for discrete, starting from DG1, this is no longer supported, and + * is instead rejected. On such platforms the CPU domain is effectively static, + * where we also only support a single &drm_i915_gem_mmap_offset cache mode, + * which can't be set explicitly and instead depends on the object placements, + * as per the below. + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. + */ struct drm_i915_gem_set_domain { - /** Handle for the object */ + /** @handle: Handle for the object. */ __u32 handle; - /** New read domains */ + /** @read_domains: New read domains. */ __u32 read_domains; - /** New write domain */ + /** + * @write_domain: New write domain. + * + * Note that having something in the write domain implies it's in the + * read domain, and only that read domain. + */ __u32 write_domain; }; @@ -1348,12 +1428,11 @@ struct drm_i915_gem_busy { * reading from the object simultaneously. * * The value of each engine class is the same as specified in the - * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e. + * I915_CONTEXT_PARAM_ENGINES context parameter and via perf, i.e. * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc. - * reported as active itself. Some hardware may have parallel - * execution engines, e.g. multiple media engines, which are - * mapped to the same class identifier and so are not separately - * reported for busyness. + * Some hardware may have parallel execution engines, e.g. multiple + * media engines, which are mapped to the same class identifier and so + * are not separately reported for busyness. * * Caveat emptor: * Only the boolean result of this query is reliable; that is whether @@ -1364,43 +1443,79 @@ struct drm_i915_gem_busy { }; /** - * I915_CACHING_NONE - * - * GPU access is not coherent with cpu caches. Default for machines without an - * LLC. - */ -#define I915_CACHING_NONE 0 -/** - * I915_CACHING_CACHED - * - * GPU access is coherent with cpu caches and furthermore the data is cached in - * last-level caches shared between cpu cores and the gpu GT. Default on - * machines with HAS_LLC. + * struct drm_i915_gem_caching - Set or get the caching for given object + * handle. + * + * Allow userspace to control the GTT caching bits for a given object when the + * object is later mapped through the ppGTT(or GGTT on older platforms lacking + * ppGTT support, or if the object is used for scanout). Note that this might + * require unbinding the object from the GTT first, if its current caching value + * doesn't match. + * + * Note that this all changes on discrete platforms, starting from DG1, the + * set/get caching is no longer supported, and is now rejected. Instead the CPU + * caching attributes(WB vs WC) will become an immutable creation time property + * for the object, along with the GTT caching level. For now we don't expose any + * new uAPI for this, instead on DG1 this is all implicit, although this largely + * shouldn't matter since DG1 is coherent by default(without any way of + * controlling it). + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. + * + * Side note: Part of the reason for this is that changing the at-allocation-time CPU + * caching attributes for the pages might be required(and is expensive) if we + * need to then CPU map the pages later with different caching attributes. This + * inconsistent caching behaviour, while supported on x86, is not universally + * supported on other architectures. So for simplicity we opt for setting + * everything at creation time, whilst also making it immutable, on discrete + * platforms. */ -#define I915_CACHING_CACHED 1 -/** - * I915_CACHING_DISPLAY - * - * Special GPU caching mode which is coherent with the scanout engines. - * Transparently falls back to I915_CACHING_NONE on platforms where no special - * cache mode (like write-through or gfdt flushing) is available. The kernel - * automatically sets this mode when using a buffer as a scanout target. - * Userspace can manually set this mode to avoid a costly stall and clflush in - * the hotpath of drawing the first frame. - */ -#define I915_CACHING_DISPLAY 2 - struct drm_i915_gem_caching { /** - * Handle of the buffer to set/get the caching level of. */ + * @handle: Handle of the buffer to set/get the caching level. + */ __u32 handle; /** - * Cacheing level to apply or return value + * @caching: The GTT caching level to apply or possible return value. + * + * The supported @caching values: * - * bits0-15 are for generic caching control (i.e. the above defined - * values). bits16-31 are reserved for platform-specific variations - * (e.g. l3$ caching on gen7). */ + * I915_CACHING_NONE: + * + * GPU access is not coherent with CPU caches. Default for machines + * without an LLC. This means manual flushing might be needed, if we + * want GPU access to be coherent. + * + * I915_CACHING_CACHED: + * + * GPU access is coherent with CPU caches and furthermore the data is + * cached in last-level caches shared between CPU cores and the GPU GT. + * + * I915_CACHING_DISPLAY: + * + * Special GPU caching mode which is coherent with the scanout engines. + * Transparently falls back to I915_CACHING_NONE on platforms where no + * special cache mode (like write-through or gfdt flushing) is + * available. The kernel automatically sets this mode when using a + * buffer as a scanout target. Userspace can manually set this mode to + * avoid a costly stall and clflush in the hotpath of drawing the first + * frame. + */ +#define I915_CACHING_NONE 0 +#define I915_CACHING_CACHED 1 +#define I915_CACHING_DISPLAY 2 __u32 caching; }; @@ -1639,6 +1754,10 @@ struct drm_i915_gem_context_param { __u32 size; __u64 param; #define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 +/* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance + * someone somewhere has attempted to use it, never re-use this context + * param number. + */ #define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 #define I915_CONTEXT_PARAM_GTT_SIZE 0x3 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 @@ -1723,24 +1842,8 @@ struct drm_i915_gem_context_param { */ #define I915_CONTEXT_PARAM_PERSISTENCE 0xb -/* - * I915_CONTEXT_PARAM_RINGSIZE: - * - * Sets the size of the CS ringbuffer to use for logical ring contexts. This - * applies a limit of how many batches can be queued to HW before the caller - * is blocked due to lack of space for more commands. - * - * Only reliably possible to be set prior to first use, i.e. during - * construction. At any later point, the current execution must be flushed as - * the ring can only be changed while the context is idle. Note, the ringsize - * can be specified as a constructor property, see - * I915_CONTEXT_CREATE_EXT_SETPARAM, but can also be set later if required. - * - * Only applies to the current set of engine and lost when those engines - * are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES). - * - * Must be between 4 - 512 KiB, in intervals of page size [4 KiB]. - * Default is 16 KiB. +/* This API has been removed. On the off chance someone somewhere has + * attempted to use it, never re-use this context param number. */ #define I915_CONTEXT_PARAM_RINGSIZE 0xc /* Must be kept compact -- no holes and well documented */ @@ -1807,6 +1910,69 @@ struct drm_i915_gem_context_param_sseu { __u32 rsvd; }; +/** + * DOC: Virtual Engine uAPI + * + * Virtual engine is a concept where userspace is able to configure a set of + * physical engines, submit a batch buffer, and let the driver execute it on any + * engine from the set as it sees fit. + * + * This is primarily useful on parts which have multiple instances of a same + * class engine, like for example GT3+ Skylake parts with their two VCS engines. + * + * For instance userspace can enumerate all engines of a certain class using the + * previously described `Engine Discovery uAPI`_. After that userspace can + * create a GEM context with a placeholder slot for the virtual engine (using + * `I915_ENGINE_CLASS_INVALID` and `I915_ENGINE_CLASS_INVALID_NONE` for class + * and instance respectively) and finally using the + * `I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE` extension place a virtual engine in + * the same reserved slot. + * + * Example of creating a virtual engine and submitting a batch buffer to it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(virtual, 2) = { + * .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE, + * .engine_index = 0, // Place this virtual engine into engine map slot 0 + * .num_siblings = 2, + * .engines = { { I915_ENGINE_CLASS_VIDEO, 0 }, + * { I915_ENGINE_CLASS_VIDEO, 1 }, }, + * }; + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = { + * .engines = { { I915_ENGINE_CLASS_INVALID, + * I915_ENGINE_CLASS_INVALID_NONE } }, + * .extensions = to_user_pointer(&virtual), // Chains after load_balance extension + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // Now we have created a GEM context with its engine map containing a + * // single virtual engine. Submissions to this slot can go either to + * // vcs0 or vcs1, depending on the load balancing algorithm used inside + * // the driver. The load balancing is dynamic from one batch buffer to + * // another and transparent to userspace. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0 which is the virtual engine + * gem_execbuf(drm_fd, &execbuf); + */ + /* * i915_context_engines_load_balance: * @@ -1883,6 +2049,61 @@ struct i915_context_engines_bond { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +/** + * DOC: Context Engine Map uAPI + * + * Context engine map is a new way of addressing engines when submitting batch- + * buffers, replacing the existing way of using identifiers like `I915_EXEC_BLT` + * inside the flags field of `struct drm_i915_gem_execbuffer2`. + * + * To use it created GEM contexts need to be configured with a list of engines + * the user is intending to submit to. This is accomplished using the + * `I915_CONTEXT_PARAM_ENGINES` parameter and `struct + * i915_context_param_engines`. + * + * For such contexts the `I915_EXEC_RING_MASK` field becomes an index into the + * configured map. + * + * Example of creating such context and submitting against it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = { + * .engines = { { I915_ENGINE_CLASS_RENDER, 0 }, + * { I915_ENGINE_CLASS_COPY, 0 } } + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // We have now created a GEM context with two engines in the map: + * // Index 0 points to rcs0 while index 1 points to bcs0. Other engines + * // will not be accessible from this context. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0, which is rcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 1; // Submits to index 0, which is bcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + */ + struct i915_context_param_engines { __u64 extensions; /* linked chain of extension blocks, 0 terminates */ #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ @@ -1901,20 +2122,10 @@ struct drm_i915_gem_context_create_ext_setparam { struct drm_i915_gem_context_param param; }; -struct drm_i915_gem_context_create_ext_clone { +/* This API has been removed. On the off chance someone somewhere has + * attempted to use it, never re-use this extension number. + */ #define I915_CONTEXT_CREATE_EXT_CLONE 1 - struct i915_user_extension base; - __u32 clone_id; - __u32 flags; -#define I915_CONTEXT_CLONE_ENGINES (1u << 0) -#define I915_CONTEXT_CLONE_FLAGS (1u << 1) -#define I915_CONTEXT_CLONE_SCHEDATTR (1u << 2) -#define I915_CONTEXT_CLONE_SSEU (1u << 3) -#define I915_CONTEXT_CLONE_TIMELINE (1u << 4) -#define I915_CONTEXT_CLONE_VM (1u << 5) -#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1) - __u64 rsvd; -}; struct drm_i915_gem_context_destroy { __u32 ctx_id; @@ -1986,14 +2197,69 @@ struct drm_i915_reset_stats { __u32 pad; }; +/** + * struct drm_i915_gem_userptr - Create GEM object from user allocated memory. + * + * Userptr objects have several restrictions on what ioctls can be used with the + * object handle. + */ struct drm_i915_gem_userptr { + /** + * @user_ptr: The pointer to the allocated memory. + * + * Needs to be aligned to PAGE_SIZE. + */ __u64 user_ptr; + + /** + * @user_size: + * + * The size in bytes for the allocated memory. This will also become the + * object size. + * + * Needs to be aligned to PAGE_SIZE, and should be at least PAGE_SIZE, + * or larger. + */ __u64 user_size; + + /** + * @flags: + * + * Supported flags: + * + * I915_USERPTR_READ_ONLY: + * + * Mark the object as readonly, this also means GPU access can only be + * readonly. This is only supported on HW which supports readonly access + * through the GTT. If the HW can't support readonly access, an error is + * returned. + * + * I915_USERPTR_PROBE: + * + * Probe the provided @user_ptr range and validate that the @user_ptr is + * indeed pointing to normal memory and that the range is also valid. + * For example if some garbage address is given to the kernel, then this + * should complain. + * + * Returns -EFAULT if the probe failed. + * + * Note that this doesn't populate the backing pages, and also doesn't + * guarantee that the object will remain valid when the object is + * eventually used. + * + * The kernel supports this feature if I915_PARAM_HAS_USERPTR_PROBE + * returns a non-zero value. + * + * I915_USERPTR_UNSYNCHRONIZED: + * + * NOT USED. Setting this flag will result in an error. + */ __u32 flags; #define I915_USERPTR_READ_ONLY 0x1 +#define I915_USERPTR_PROBE 0x2 #define I915_USERPTR_UNSYNCHRONIZED 0x80000000 /** - * Returned handle for the object. + * @handle: Returned handle for the object. * * Object handles are nonzero. */ @@ -2377,6 +2643,76 @@ struct drm_i915_query_topology_info { }; /** + * DOC: Engine Discovery uAPI + * + * Engine discovery uAPI is a way of enumerating physical engines present in a + * GPU associated with an open i915 DRM file descriptor. This supersedes the old + * way of using `DRM_IOCTL_I915_GETPARAM` and engine identifiers like + * `I915_PARAM_HAS_BLT`. + * + * The need for this interface came starting with Icelake and newer GPUs, which + * started to establish a pattern of having multiple engines of a same class, + * where not all instances were always completely functionally equivalent. + * + * Entry point for this uapi is `DRM_IOCTL_I915_QUERY` with the + * `DRM_I915_QUERY_ENGINE_INFO` as the queried item id. + * + * Example for getting the list of engines: + * + * .. code-block:: C + * + * struct drm_i915_query_engine_info *info; + * struct drm_i915_query_item item = { + * .query_id = DRM_I915_QUERY_ENGINE_INFO; + * }; + * struct drm_i915_query query = { + * .num_items = 1, + * .items_ptr = (uintptr_t)&item, + * }; + * int err, i; + * + * // First query the size of the blob we need, this needs to be large + * // enough to hold our array of engines. The kernel will fill out the + * // item.length for us, which is the number of bytes we need. + * // + * // Alternatively a large buffer can be allocated straight away enabling + * // querying in one pass, in which case item.length should contain the + * // length of the provided buffer. + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * info = calloc(1, item.length); + * // Now that we allocated the required number of bytes, we call the ioctl + * // again, this time with the data_ptr pointing to our newly allocated + * // blob, which the kernel can then populate with info on all engines. + * item.data_ptr = (uintptr_t)&info, + * + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * // We can now access each engine in the array + * for (i = 0; i < info->num_engines; i++) { + * struct drm_i915_engine_info einfo = info->engines[i]; + * u16 class = einfo.engine.class; + * u16 instance = einfo.engine.instance; + * .... + * } + * + * free(info); + * + * Each of the enumerated engines, apart from being defined by its class and + * instance (see `struct i915_engine_class_instance`), also can have flags and + * capabilities defined as documented in i915_drm.h. + * + * For instance video engines which support HEVC encoding will have the + * `I915_VIDEO_CLASS_CAPABILITY_HEVC` capability bit set. + * + * Engine discovery only fully comes to its own when combined with the new way + * of addressing engines when submitting batch buffers using contexts with + * engine maps configured. + */ + +/** * struct drm_i915_engine_info * * Describes one engine and it's capabilities as known to the driver. diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index 4c32e97dcdf0..bdf7b404b3e7 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -184,6 +184,7 @@ struct fsxattr { #define BLKSECDISCARD _IO(0x12,125) #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) +#define BLKGETDISKSEQ _IOR(0x12,128,__u64) /* * A jump here: 130-136 are reserved for zoned block devices * (see uapi/linux/blkzoned.h) diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index d1b327036ae4..14168225cecd 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -188,11 +188,22 @@ struct ip_mreq_source { }; struct ip_msfilter { - __be32 imsf_multiaddr; - __be32 imsf_interface; - __u32 imsf_fmode; - __u32 imsf_numsrc; - __be32 imsf_slist[1]; + union { + struct { + __be32 imsf_multiaddr_aux; + __be32 imsf_interface_aux; + __u32 imsf_fmode_aux; + __u32 imsf_numsrc_aux; + __be32 imsf_slist[1]; + }; + struct { + __be32 imsf_multiaddr; + __be32 imsf_interface; + __u32 imsf_fmode; + __u32 imsf_numsrc; + __be32 imsf_slist_flex[]; + }; + }; }; #define IP_MSFILTER_SIZE(numsrc) \ @@ -211,11 +222,22 @@ struct group_source_req { }; struct group_filter { - __u32 gf_interface; /* interface index */ - struct __kernel_sockaddr_storage gf_group; /* multicast address */ - __u32 gf_fmode; /* filter mode */ - __u32 gf_numsrc; /* number of sources */ - struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + union { + struct { + __u32 gf_interface_aux; /* interface index */ + struct __kernel_sockaddr_storage gf_group_aux; /* multicast address */ + __u32 gf_fmode_aux; /* filter mode */ + __u32 gf_numsrc_aux; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + }; + struct { + __u32 gf_interface; /* interface index */ + struct __kernel_sockaddr_storage gf_group; /* multicast address */ + __u32 gf_fmode; /* filter mode */ + __u32 gf_numsrc; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */ + }; + }; }; #define GROUP_FILTER_SIZE(numsrc) \ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index d9e4aabcb31a..a067410ebea5 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1965,7 +1965,9 @@ struct kvm_stats_header { #define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT) #define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT) #define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT) -#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_PEAK +#define KVM_STATS_TYPE_LINEAR_HIST (0x3 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_LOG_HIST (0x4 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_LOG_HIST #define KVM_STATS_UNIT_SHIFT 4 #define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT) @@ -1988,8 +1990,9 @@ struct kvm_stats_header { * @size: The number of data items for this stats. * Every data item is of type __u64. * @offset: The offset of the stats to the start of stat structure in - * struture kvm or kvm_vcpu. - * @unused: Unused field for future usage. Always 0 for now. + * structure kvm or kvm_vcpu. + * @bucket_size: A parameter value used for histogram stats. It is only used + * for linear histogram stats, specifying the size of the bucket; * @name: The name string for the stats. Its size is indicated by the * &kvm_stats_header->name_size. */ @@ -1998,7 +2001,7 @@ struct kvm_stats_desc { __s16 exponent; __u16 size; __u32 offset; - __u32 unused; + __u32 bucket_size; char name[]; }; diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index dd7a166fdf9c..4d93967f8aea 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -73,7 +73,8 @@ #define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ #define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ #define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ -#define MOVE_MOUNT__MASK 0x00000077 +#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */ +#define MOVE_MOUNT__MASK 0x00000177 /* * fsopen() flags. diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 967d9c55323d..43bd7f713c39 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -213,6 +213,7 @@ struct prctl_mm_map { /* Speculation control variants */ # define PR_SPEC_STORE_BYPASS 0 # define PR_SPEC_INDIRECT_BRANCH 1 +# define PR_SPEC_L1D_FLUSH 2 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */ # define PR_SPEC_NOT_AFFECTED 0 # define PR_SPEC_PRCTL (1UL << 0) @@ -234,14 +235,15 @@ struct prctl_mm_map { #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) /* MTE tag check fault modes */ -# define PR_MTE_TCF_SHIFT 1 -# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_NONE 0 +# define PR_MTE_TCF_SYNC (1UL << 1) +# define PR_MTE_TCF_ASYNC (1UL << 2) +# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC) /* MTE tag inclusion mask */ # define PR_MTE_TAG_SHIFT 3 # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) +/* Unused; kept only for source compatibility */ +# define PR_MTE_TCF_SHIFT 1 /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index d17c061950df..1d84ec9db93b 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -299,6 +299,7 @@ typedef int __bitwise snd_pcm_subformat_t; #define SNDRV_PCM_INFO_HAS_LINK_ABSOLUTE_ATIME 0x02000000 /* report absolute hardware link audio time, not reset on startup */ #define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */ #define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */ +#define SNDRV_PCM_INFO_EXPLICIT_SYNC 0x10000000 /* needs explicit sync of pointers and data */ #define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */ #define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */ diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index d8886720e83d..8441e3e1aaac 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -43,7 +43,7 @@ void perf_evsel__delete(struct perf_evsel *evsel) free(evsel); } -#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y)) +#define FD(e, x, y) ((int *) xyarray__entry(e->fd, x, y)) #define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL) int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) @@ -54,7 +54,10 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) int cpu, thread; for (cpu = 0; cpu < ncpus; cpu++) { for (thread = 0; thread < nthreads; thread++) { - FD(evsel, cpu, thread) = -1; + int *fd = FD(evsel, cpu, thread); + + if (fd) + *fd = -1; } } } @@ -80,7 +83,7 @@ sys_perf_event_open(struct perf_event_attr *attr, static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *group_fd) { struct perf_evsel *leader = evsel->leader; - int fd; + int *fd; if (evsel == leader) { *group_fd = -1; @@ -95,10 +98,10 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou return -ENOTCONN; fd = FD(leader, cpu, thread); - if (fd == -1) + if (fd == NULL || *fd == -1) return -EBADF; - *group_fd = fd; + *group_fd = *fd; return 0; } @@ -138,7 +141,11 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, for (cpu = 0; cpu < cpus->nr; cpu++) { for (thread = 0; thread < threads->nr; thread++) { - int fd, group_fd; + int fd, group_fd, *evsel_fd; + + evsel_fd = FD(evsel, cpu, thread); + if (evsel_fd == NULL) + return -EINVAL; err = get_group_fd(evsel, cpu, thread, &group_fd); if (err < 0) @@ -151,7 +158,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, if (fd < 0) return -errno; - FD(evsel, cpu, thread) = fd; + *evsel_fd = fd; } } @@ -163,9 +170,12 @@ static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu) int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { - if (FD(evsel, cpu, thread) >= 0) - close(FD(evsel, cpu, thread)); - FD(evsel, cpu, thread) = -1; + int *fd = FD(evsel, cpu, thread); + + if (fd && *fd >= 0) { + close(*fd); + *fd = -1; + } } } @@ -209,13 +219,12 @@ void perf_evsel__munmap(struct perf_evsel *evsel) for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int fd = FD(evsel, cpu, thread); - struct perf_mmap *map = MMAP(evsel, cpu, thread); + int *fd = FD(evsel, cpu, thread); - if (fd < 0) + if (fd == NULL || *fd < 0) continue; - perf_mmap__munmap(map); + perf_mmap__munmap(MMAP(evsel, cpu, thread)); } } @@ -239,15 +248,16 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int fd = FD(evsel, cpu, thread); - struct perf_mmap *map = MMAP(evsel, cpu, thread); + int *fd = FD(evsel, cpu, thread); + struct perf_mmap *map; - if (fd < 0) + if (fd == NULL || *fd < 0) continue; + map = MMAP(evsel, cpu, thread); perf_mmap__init(map, NULL, false, NULL); - ret = perf_mmap__mmap(map, &mp, fd, cpu); + ret = perf_mmap__mmap(map, &mp, *fd, cpu); if (ret) { perf_evsel__munmap(evsel); return ret; @@ -260,7 +270,9 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread) { - if (FD(evsel, cpu, thread) < 0 || MMAP(evsel, cpu, thread) == NULL) + int *fd = FD(evsel, cpu, thread); + + if (fd == NULL || *fd < 0 || MMAP(evsel, cpu, thread) == NULL) return NULL; return MMAP(evsel, cpu, thread)->base; @@ -295,17 +307,18 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count) { size_t size = perf_evsel__read_size(evsel); + int *fd = FD(evsel, cpu, thread); memset(count, 0, sizeof(*count)); - if (FD(evsel, cpu, thread) < 0) + if (fd == NULL || *fd < 0) return -EINVAL; if (MMAP(evsel, cpu, thread) && !perf_mmap__read_self(MMAP(evsel, cpu, thread), count)) return 0; - if (readn(FD(evsel, cpu, thread), count->values, size) <= 0) + if (readn(*fd, count->values, size) <= 0) return -errno; return 0; @@ -318,8 +331,13 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int fd = FD(evsel, cpu, thread), - err = ioctl(fd, ioc, arg); + int err; + int *fd = FD(evsel, cpu, thread); + + if (fd == NULL || *fd < 0) + return -1; + + err = ioctl(*fd, ioc, arg); if (err) return err; diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index e555e9729758..8e0163b7ef01 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -39,3 +39,4 @@ pmu-events/jevents feature/ fixdep libtraceevent-dynamic-list +Documentation/doc.dep diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index b66cf128cbc7..446180401e26 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -827,33 +827,36 @@ else endif endif -ifeq ($(feature-libbfd), 1) - EXTLIBS += -lbfd -lopcodes -else - # we are on a system that requires -liberty and (maybe) -lz - # to link against -lbfd; test each case individually here - - # call all detections now so we get correct - # status in VF output - $(call feature_check,libbfd-liberty) - $(call feature_check,libbfd-liberty-z) - ifeq ($(feature-libbfd-liberty), 1) - EXTLIBS += -lbfd -lopcodes -liberty - FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl +ifndef NO_LIBBFD + ifeq ($(feature-libbfd), 1) + EXTLIBS += -lbfd -lopcodes else - ifeq ($(feature-libbfd-liberty-z), 1) - EXTLIBS += -lbfd -lopcodes -liberty -lz - FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl + # we are on a system that requires -liberty and (maybe) -lz + # to link against -lbfd; test each case individually here + + # call all detections now so we get correct + # status in VF output + $(call feature_check,libbfd-liberty) + $(call feature_check,libbfd-liberty-z) + + ifeq ($(feature-libbfd-liberty), 1) + EXTLIBS += -lbfd -lopcodes -liberty + FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl + else + ifeq ($(feature-libbfd-liberty-z), 1) + EXTLIBS += -lbfd -lopcodes -liberty -lz + FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl + endif endif + $(call feature_check,disassembler-four-args) endif - $(call feature_check,disassembler-four-args) -endif -ifeq ($(feature-libbfd-buildid), 1) - CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT -else - msg := $(warning Old version of libbfd/binutils things like PE executable profiling will not be available); + ifeq ($(feature-libbfd-buildid), 1) + CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT + else + msg := $(warning Old version of libbfd/binutils things like PE executable profiling will not be available); + endif endif ifdef NO_DEMANGLE diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index ac653d08b1ea..1ca7bc337932 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -361,3 +361,5 @@ 444 n64 landlock_create_ruleset sys_landlock_create_ruleset 445 n64 landlock_add_rule sys_landlock_add_rule 446 n64 landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 n64 process_mrelease sys_process_mrelease diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 6f3953f2a0d5..7bef917cc84e 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -330,10 +330,10 @@ 256 64 sys_debug_setcontext sys_ni_syscall 256 spu sys_debug_setcontext sys_ni_syscall # 257 reserved for vserver -258 nospu migrate_pages sys_migrate_pages compat_sys_migrate_pages -259 nospu mbind sys_mbind compat_sys_mbind -260 nospu get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy -261 nospu set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy +258 nospu migrate_pages sys_migrate_pages +259 nospu mbind sys_mbind +260 nospu get_mempolicy sys_get_mempolicy +261 nospu set_mempolicy sys_set_mempolicy 262 nospu mq_open sys_mq_open compat_sys_mq_open 263 nospu mq_unlink sys_mq_unlink 264 32 mq_timedsend sys_mq_timedsend_time32 @@ -381,7 +381,7 @@ 298 common faccessat sys_faccessat 299 common get_robust_list sys_get_robust_list compat_sys_get_robust_list 300 common set_robust_list sys_set_robust_list compat_sys_set_robust_list -301 common move_pages sys_move_pages compat_sys_move_pages +301 common move_pages sys_move_pages 302 common getcpu sys_getcpu 303 nospu epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait 304 32 utimensat sys_utimensat_time32 @@ -526,3 +526,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 8d619ec86dcc..df5261e5cfe1 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -122,7 +122,7 @@ 131 common quotactl sys_quotactl sys_quotactl 132 common getpgid sys_getpgid sys_getpgid 133 common fchdir sys_fchdir sys_fchdir -134 common bdflush - - +134 common bdflush sys_ni_syscall sys_ni_syscall 135 common sysfs sys_sysfs sys_sysfs 136 common personality sys_s390_personality sys_s390_personality 137 common afs_syscall - - @@ -274,9 +274,9 @@ 265 common statfs64 sys_statfs64 compat_sys_statfs64 266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 267 common remap_file_pages sys_remap_file_pages sys_remap_file_pages -268 common mbind sys_mbind compat_sys_mbind -269 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy -270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy +268 common mbind sys_mbind sys_mbind +269 common get_mempolicy sys_get_mempolicy sys_get_mempolicy +270 common set_mempolicy sys_set_mempolicy sys_set_mempolicy 271 common mq_open sys_mq_open compat_sys_mq_open 272 common mq_unlink sys_mq_unlink sys_mq_unlink 273 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 @@ -293,7 +293,7 @@ 284 common inotify_init sys_inotify_init sys_inotify_init 285 common inotify_add_watch sys_inotify_add_watch sys_inotify_add_watch 286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch -287 common migrate_pages sys_migrate_pages compat_sys_migrate_pages +287 common migrate_pages sys_migrate_pages sys_migrate_pages 288 common openat sys_openat compat_sys_openat 289 common mkdirat sys_mkdirat sys_mkdirat 290 common mknodat sys_mknodat sys_mknodat @@ -317,7 +317,7 @@ 307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range 308 common tee sys_tee sys_tee 309 common vmsplice sys_vmsplice sys_vmsplice -310 common move_pages sys_move_pages compat_sys_move_pages +310 common move_pages sys_move_pages sys_move_pages 311 common getcpu sys_getcpu sys_getcpu 312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait 313 common utimes sys_utimes sys_utimes_time32 @@ -449,3 +449,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease sys_process_mrelease diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index f6b57799c1ea..18b5500ea8bf 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -369,6 +369,7 @@ 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self 447 common memfd_secret sys_memfd_secret +448 common process_mrelease sys_process_mrelease # # Due to a historical design error, certain syscalls are numbered differently @@ -397,7 +398,7 @@ 530 x32 set_robust_list compat_sys_set_robust_list 531 x32 get_robust_list compat_sys_get_robust_list 532 x32 vmsplice sys_vmsplice -533 x32 move_pages compat_sys_move_pages +533 x32 move_pages sys_move_pages 534 x32 preadv compat_sys_preadv64 535 x32 pwritev compat_sys_pwritev64 536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 0e824f7d8b19..6211d0b84b7a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -368,16 +368,6 @@ static inline int output_type(unsigned int type) return OUTPUT_TYPE_OTHER; } -static inline unsigned int attr_type(unsigned int type) -{ - switch (type) { - case OUTPUT_TYPE_SYNTH: - return PERF_TYPE_SYNTH; - default: - return type; - } -} - static bool output_set_by_user(void) { int j; @@ -556,6 +546,18 @@ static void set_print_ip_opts(struct perf_event_attr *attr) output[type].print_ip_opts |= EVSEL__PRINT_SRCLINE; } +static struct evsel *find_first_output_type(struct evlist *evlist, + unsigned int type) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (output_type(evsel->core.attr.type) == (int)type) + return evsel; + } + return NULL; +} + /* * verify all user requested events exist and the samples * have the expected data @@ -567,7 +569,7 @@ static int perf_session__check_output_opt(struct perf_session *session) struct evsel *evsel; for (j = 0; j < OUTPUT_TYPE_MAX; ++j) { - evsel = perf_session__find_first_evtype(session, attr_type(j)); + evsel = find_first_output_type(session->evlist, j); /* * even if fields is set to 0 (ie., show nothing) event must diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index c783558332b8..f1e46277e822 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -144,6 +144,7 @@ done # diff with extra ignore lines check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' +check arch/x86/include/asm/amd-ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"' diff --git a/tools/perf/scripts/python/bin/stackcollapse-report b/tools/perf/scripts/python/bin/stackcollapse-report index 356b9656393d..21a356bd27f6 100755 --- a/tools/perf/scripts/python/bin/stackcollapse-report +++ b/tools/perf/scripts/python/bin/stackcollapse-report @@ -1,3 +1,3 @@ #!/bin/sh # description: produce callgraphs in short form for scripting use -perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py -- "$@" +perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py "$@" diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index dbf5f5215abe..fa03ff0dc083 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -192,7 +192,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), } if (count != expect * evlist->core.nr_entries) { - pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count); + pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect * evlist->core.nr_entries, count); goto out_delete_evlist; } diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 0d8e3dcb7f88..041d6032a348 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -223,8 +223,11 @@ struct ucred { * reuses AF_INET address family */ #define AF_XDP 44 /* XDP sockets */ +#define AF_MCTP 45 /* Management component + * transport protocol + */ -#define AF_MAX 45 /* For now.. */ +#define AF_MAX 46 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -274,6 +277,7 @@ struct ucred { #define PF_QIPCRTR AF_QIPCRTR #define PF_SMC AF_SMC #define PF_XDP AF_XDP +#define PF_MCTP AF_MCTP #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ @@ -421,6 +425,9 @@ extern int __sys_accept4_file(struct file *file, unsigned file_flags, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags, unsigned long nofile); +extern struct file *do_accept(struct file *file, unsigned file_flags, + struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags); extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_socket(int family, int type, int protocol); diff --git a/tools/perf/trace/beauty/move_mount_flags.sh b/tools/perf/trace/beauty/move_mount_flags.sh index 55e59241daa4..4b1d9acc0bd0 100755 --- a/tools/perf/trace/beauty/move_mount_flags.sh +++ b/tools/perf/trace/beauty/move_mount_flags.sh @@ -10,7 +10,7 @@ fi linux_mount=${linux_header_dir}/mount.h printf "static const char *move_mount_flags[] = {\n" -regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([FT]_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([^_]+_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' egrep $regex ${linux_mount} | \ sed -r "s/$regex/\2 \1/g" | \ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n" diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 781afe42e90e..fa5bd5c20e96 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -757,25 +757,40 @@ void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, } void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column, - unsigned int row, bool arrow_down) + unsigned int row, int diff, bool arrow_down) { - unsigned int end_row; + int end_row; - if (row >= browser->top_idx) - end_row = row - browser->top_idx; - else + if (diff <= 0) return; SLsmg_set_char_set(1); if (arrow_down) { + if (row + diff <= browser->top_idx) + return; + + end_row = row + diff - browser->top_idx; ui_browser__gotorc(browser, end_row, column - 1); - SLsmg_write_char(SLSMG_ULCORN_CHAR); - ui_browser__gotorc(browser, end_row, column); - SLsmg_draw_hline(2); - ui_browser__gotorc(browser, end_row + 1, column - 1); SLsmg_write_char(SLSMG_LTEE_CHAR); + + while (--end_row >= 0 && end_row > (int)(row - browser->top_idx)) { + ui_browser__gotorc(browser, end_row, column - 1); + SLsmg_draw_vline(1); + } + + end_row = (int)(row - browser->top_idx); + if (end_row >= 0) { + ui_browser__gotorc(browser, end_row, column - 1); + SLsmg_write_char(SLSMG_ULCORN_CHAR); + ui_browser__gotorc(browser, end_row, column); + SLsmg_draw_hline(2); + } } else { + if (row < browser->top_idx) + return; + + end_row = row - browser->top_idx; ui_browser__gotorc(browser, end_row, column - 1); SLsmg_write_char(SLSMG_LTEE_CHAR); ui_browser__gotorc(browser, end_row, column); diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index 3678eb88f119..510ce4554050 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -51,7 +51,7 @@ void ui_browser__write_graph(struct ui_browser *browser, int graph); void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, u64 start, u64 end); void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column, - unsigned int row, bool arrow_down); + unsigned int row, int diff, bool arrow_down); void __ui_browser__show_title(struct ui_browser *browser, const char *title); void ui_browser__show_title(struct ui_browser *browser, const char *title); int ui_browser__show(struct ui_browser *browser, const char *title, diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ef4da4295bf7..e81c2493efdf 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -125,13 +125,20 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ab->selection = al; } -static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor) +static int is_fused(struct annotate_browser *ab, struct disasm_line *cursor) { struct disasm_line *pos = list_prev_entry(cursor, al.node); const char *name; + int diff = 1; + + while (pos && pos->al.offset == -1) { + pos = list_prev_entry(pos, al.node); + if (!ab->opts->hide_src_code) + diff++; + } if (!pos) - return false; + return 0; if (ins__is_lock(&pos->ins)) name = pos->ops.locked.ins.name; @@ -139,9 +146,11 @@ static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor) name = pos->ins.name; if (!name || !cursor->ins.name) - return false; + return 0; - return ins__is_fused(ab->arch, name, cursor->ins.name); + if (ins__is_fused(ab->arch, name, cursor->ins.name)) + return diff; + return 0; } static void annotate_browser__draw_current_jump(struct ui_browser *browser) @@ -155,6 +164,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) struct annotation *notes = symbol__annotation(sym); u8 pcnt_width = annotation__pcnt_width(notes); int width; + int diff = 0; /* PLT symbols contain external offsets */ if (strstr(sym->name, "@plt")) @@ -205,11 +215,11 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) pcnt_width + 2 + notes->widths.addr + width, from, to); - if (is_fused(ab, cursor)) { + diff = is_fused(ab, cursor); + if (diff > 0) { ui_browser__mark_fused(browser, pcnt_width + 3 + notes->widths.addr + width, - from - 1, - to > from); + from - diff, diff, to > from); } } diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 2d4fa1304178..f2914d5bed6e 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -59,6 +59,7 @@ perf-y += pstack.o perf-y += session.o perf-y += sample-raw.o perf-y += s390-sample-raw.o +perf-y += amd-sample-raw.o perf-$(CONFIG_TRACE) += syscalltbl.o perf-y += ordered-events.o perf-y += namespaces.o diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c new file mode 100644 index 000000000000..d19d765195c5 --- /dev/null +++ b/tools/perf/util/amd-sample-raw.c @@ -0,0 +1,289 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AMD specific. Provide textual annotation for IBS raw sample data. + */ + +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> + +#include <linux/string.h> +#include "../../arch/x86/include/asm/amd-ibs.h" + +#include "debug.h" +#include "session.h" +#include "evlist.h" +#include "sample-raw.h" +#include "pmu-events/pmu-events.h" + +static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; + +static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) +{ + const char * const ic_miss_strs[] = { + " IcMiss 0", + " IcMiss 1", + }; + const char * const l1tlb_pgsz_strs[] = { + " L1TlbPgSz 4KB", + " L1TlbPgSz 2MB", + " L1TlbPgSz 1GB", + " L1TlbPgSz RESERVED" + }; + const char * const l1tlb_pgsz_strs_erratum1347[] = { + " L1TlbPgSz 4KB", + " L1TlbPgSz 16KB", + " L1TlbPgSz 2MB", + " L1TlbPgSz 1GB" + }; + const char *ic_miss_str = NULL; + const char *l1tlb_pgsz_str = NULL; + + if (cpu_family == 0x19 && cpu_model < 0x10) { + /* + * Erratum #1238 workaround is to ignore MSRC001_1030[IbsIcMiss] + * Erratum #1347 workaround is to use table provided in erratum + */ + if (reg.phy_addr_valid) + l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz]; + } else { + if (reg.phy_addr_valid) + l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz]; + ic_miss_str = ic_miss_strs[reg.ic_miss]; + } + + printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s " + "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s\n", + reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat, + reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "", + reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss, + reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : ""); +} + +static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg) +{ + printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n", reg.val, reg.itlb_refill_lat); +} + +static void pr_ibs_op_ctl(union ibs_op_ctl reg) +{ + printf("ibs_op_ctl:\t%016llx MaxCnt %9d En %d Val %d CntCtl %d=%s CurCnt %9d\n", + reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, reg.op_en, reg.op_val, + reg.cnt_ctl, reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt); +} + +static void pr_ibs_op_data(union ibs_op_data reg) +{ + printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d " + " RipInvalid %d BrnFuse %d Microcode %d\n", + reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr, + reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0") : "", + reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0") : "", + reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0") : "", + reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode); +} + +static void pr_ibs_op_data2(union ibs_op_data2 reg) +{ + static const char * const data_src_str[] = { + "", + " DataSrc 1=(reserved)", + " DataSrc 2=Local node cache", + " DataSrc 3=DRAM", + " DataSrc 4=Remote node cache", + " DataSrc 5=(reserved)", + " DataSrc 6=(reserved)", + " DataSrc 7=Other" + }; + + printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val, + reg.data_src == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State " + : "CacheHitSt 0=M-state ") : "", + reg.rmt_node, data_src_str[reg.data_src]); +} + +static void pr_ibs_op_data3(union ibs_op_data3 reg) +{ + char l2_miss_str[sizeof(" L2Miss _")] = ""; + char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = ""; + char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = ""; + + /* + * Erratum #1293 + * Ignore L2Miss and OpDcMissOpenMemReqs (and opdata2) if DcMissNoMabAlloc or SwPf set + */ + if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) { + snprintf(l2_miss_str, sizeof(l2_miss_str), " L2Miss %d", reg.l2_miss); + snprintf(op_dc_miss_open_mem_reqs_str, sizeof(op_dc_miss_open_mem_reqs_str), + " OpDcMissOpenMemReqs %2d", reg.op_dc_miss_open_mem_reqs); + } + + if (reg.op_mem_width) + snprintf(op_mem_width_str, sizeof(op_mem_width_str), + " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1)); + + printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d " + "DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d " + "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d " + "DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n", + reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss, + reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss, + reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op, + reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, + reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str, + op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat); +} + +/* + * IBS Op/Execution MSRs always saved, in order, are: + * IBS_OP_CTL, IBS_OP_RIP, IBS_OP_DATA, IBS_OP_DATA2, + * IBS_OP_DATA3, IBS_DC_LINADDR, IBS_DC_PHYSADDR, BP_IBSTGT_RIP + */ +static void amd_dump_ibs_op(struct perf_sample *sample) +{ + struct perf_ibs_data *data = sample->raw_data; + union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data; + __u64 *rip = (__u64 *)op_ctl + 1; + union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1); + union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3); + + pr_ibs_op_ctl(*op_ctl); + if (!op_data->op_rip_invalid) + printf("IbsOpRip:\t%016llx\n", *rip); + pr_ibs_op_data(*op_data); + /* + * Erratum #1293: ignore op_data2 if DcMissNoMabAlloc or SwPf are set + */ + if (!(cpu_family == 0x19 && cpu_model < 0x10 && + (op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf))) + pr_ibs_op_data2(*(union ibs_op_data2 *)(rip + 2)); + pr_ibs_op_data3(*op_data3); + if (op_data3->dc_lin_addr_valid) + printf("IbsDCLinAd:\t%016llx\n", *(rip + 4)); + if (op_data3->dc_phy_addr_valid) + printf("IbsDCPhysAd:\t%016llx\n", *(rip + 5)); + if (op_data->op_brn_ret && *(rip + 6)) + printf("IbsBrTarget:\t%016llx\n", *(rip + 6)); +} + +/* + * IBS Fetch MSRs always saved, in order, are: + * IBS_FETCH_CTL, IBS_FETCH_LINADDR, IBS_FETCH_PHYSADDR, IC_IBS_EXTD_CTL + */ +static void amd_dump_ibs_fetch(struct perf_sample *sample) +{ + struct perf_ibs_data *data = sample->raw_data; + union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data; + __u64 *addr = (__u64 *)fetch_ctl + 1; + union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2; + + pr_ibs_fetch_ctl(*fetch_ctl); + printf("IbsFetchLinAd:\t%016llx\n", *addr++); + if (fetch_ctl->phy_addr_valid) + printf("IbsFetchPhysAd:\t%016llx\n", *addr); + pr_ic_ibs_extd_ctl(*extd_ctl); +} + +/* + * Test for enable and valid bits in captured control MSRs. + */ +static bool is_valid_ibs_fetch_sample(struct perf_sample *sample) +{ + struct perf_ibs_data *data = sample->raw_data; + union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data; + + if (fetch_ctl->fetch_en && fetch_ctl->fetch_val) + return true; + + return false; +} + +static bool is_valid_ibs_op_sample(struct perf_sample *sample) +{ + struct perf_ibs_data *data = sample->raw_data; + union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data; + + if (op_ctl->op_en && op_ctl->op_val) + return true; + + return false; +} + +/* AMD vendor specific raw sample function. Check for PERF_RECORD_SAMPLE events + * and if the event was triggered by IBS, display its raw data with decoded text. + * The function is only invoked when the dump flag -D is set. + */ +void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event, + struct perf_sample *sample) +{ + struct evsel *evsel; + + if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size) + return; + + evsel = evlist__event2evsel(evlist, event); + if (!evsel) + return; + + if (evsel->core.attr.type == ibs_fetch_type) { + if (!is_valid_ibs_fetch_sample(sample)) { + pr_debug("Invalid raw IBS Fetch MSR data encountered\n"); + return; + } + amd_dump_ibs_fetch(sample); + } else if (evsel->core.attr.type == ibs_op_type) { + if (!is_valid_ibs_op_sample(sample)) { + pr_debug("Invalid raw IBS Op MSR data encountered\n"); + return; + } + amd_dump_ibs_op(sample); + } +} + +static void parse_cpuid(struct perf_env *env) +{ + const char *cpuid; + int ret; + + cpuid = perf_env__cpuid(env); + /* + * cpuid = "AuthenticAMD,family,model,stepping" + */ + ret = sscanf(cpuid, "%*[^,],%u,%u", &cpu_family, &cpu_model); + if (ret != 2) + pr_debug("problem parsing cpuid\n"); +} + +/* + * Find and assign the type number used for ibs_op or ibs_fetch samples. + * Device names can be large - we are only interested in the first 9 characters, + * to match "ibs_fetch". + */ +bool evlist__has_amd_ibs(struct evlist *evlist) +{ + struct perf_env *env = evlist->env; + int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env); + const char *pmu_mapping = perf_env__pmu_mappings(env); + char name[sizeof("ibs_fetch")]; + u32 type; + + while (nr_pmu_mappings--) { + ret = sscanf(pmu_mapping, "%u:%9s", &type, name); + if (ret == 2) { + if (strstarts(name, "ibs_op")) + ibs_op_type = type; + else if (strstarts(name, "ibs_fetch")) + ibs_fetch_type = type; + } + pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */; + } + + if (ibs_fetch_type || ibs_op_type) { + if (!cpu_family) + parse_cpuid(env); + return true; + } + + return false; +} diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 996d025b8ed8..1a7112a87736 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -21,6 +21,17 @@ #include "record.h" #include "util/synthetic-events.h" +struct btf * __weak btf__load_from_kernel_by_id(__u32 id) +{ + struct btf *btf; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + int err = btf__get_from_id(id, &btf); +#pragma GCC diagnostic pop + + return err ? ERR_PTR(err) : btf; +} + #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index ee15db2be2f4..9ed9a5676d35 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1349,6 +1349,16 @@ void dso__set_build_id(struct dso *dso, struct build_id *bid) bool dso__build_id_equal(const struct dso *dso, struct build_id *bid) { + if (dso->bid.size > bid->size && dso->bid.size == BUILD_ID_SIZE) { + /* + * For the backward compatibility, it allows a build-id has + * trailing zeros. + */ + return !memcmp(dso->bid.data, bid->data, bid->size) && + !memchr_inv(&dso->bid.data[bid->size], 0, + dso->bid.size - bid->size); + } + return dso->bid.size == bid->size && memcmp(dso->bid.data, bid->data, dso->bid.size) == 0; } diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 8f7ff0035c41..cf773f0dec38 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -10,6 +10,7 @@ #include <sys/utsname.h> #include <stdlib.h> #include <string.h> +#include "strbuf.h" struct perf_env perf_env; @@ -306,6 +307,45 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) return 0; } +int perf_env__read_pmu_mappings(struct perf_env *env) +{ + struct perf_pmu *pmu = NULL; + u32 pmu_num = 0; + struct strbuf sb; + + while ((pmu = perf_pmu__scan(pmu))) { + if (!pmu->name) + continue; + pmu_num++; + } + if (!pmu_num) { + pr_debug("pmu mappings not available\n"); + return -ENOENT; + } + env->nr_pmu_mappings = pmu_num; + + if (strbuf_init(&sb, 128 * pmu_num) < 0) + return -ENOMEM; + + while ((pmu = perf_pmu__scan(pmu))) { + if (!pmu->name) + continue; + if (strbuf_addf(&sb, "%u:%s", pmu->type, pmu->name) < 0) + goto error; + /* include a NULL character at the end */ + if (strbuf_add(&sb, "", 1) < 0) + goto error; + } + + env->pmu_mappings = strbuf_detach(&sb, NULL); + + return 0; + +error: + strbuf_release(&sb); + return -1; +} + int perf_env__read_cpuid(struct perf_env *env) { char cpuid[128]; @@ -404,6 +444,44 @@ const char *perf_env__arch(struct perf_env *env) return normalize_arch(arch_name); } +const char *perf_env__cpuid(struct perf_env *env) +{ + int status; + + if (!env || !env->cpuid) { /* Assume local operation */ + status = perf_env__read_cpuid(env); + if (status) + return NULL; + } + + return env->cpuid; +} + +int perf_env__nr_pmu_mappings(struct perf_env *env) +{ + int status; + + if (!env || !env->nr_pmu_mappings) { /* Assume local operation */ + status = perf_env__read_pmu_mappings(env); + if (status) + return 0; + } + + return env->nr_pmu_mappings; +} + +const char *perf_env__pmu_mappings(struct perf_env *env) +{ + int status; + + if (!env || !env->pmu_mappings) { /* Assume local operation */ + status = perf_env__read_pmu_mappings(env); + if (status) + return NULL; + } + + return env->pmu_mappings; +} int perf_env__numa_node(struct perf_env *env, int cpu) { diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 1f5175820a05..1383876f72b3 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -149,11 +149,16 @@ int perf_env__kernel_is_64_bit(struct perf_env *env); int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpuid(struct perf_env *env); +int perf_env__read_pmu_mappings(struct perf_env *env); +int perf_env__nr_pmu_mappings(struct perf_env *env); +const char *perf_env__pmu_mappings(struct perf_env *env); + int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); const char *perf_env__arch(struct perf_env *env); +const char *perf_env__cpuid(struct perf_env *env); const char *perf_env__raw_arch(struct perf_env *env); int perf_env__nr_cpus_avail(struct perf_env *env); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 54d251327b5b..dbfeceb2546c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -333,11 +333,11 @@ error_free: goto out; } -static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src) +int copy_config_terms(struct list_head *dst, struct list_head *src) { struct evsel_config_term *pos, *tmp; - list_for_each_entry(pos, &src->config_terms, list) { + list_for_each_entry(pos, src, list) { tmp = malloc(sizeof(*tmp)); if (tmp == NULL) return -ENOMEM; @@ -350,11 +350,16 @@ static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src) return -ENOMEM; } } - list_add_tail(&tmp->list, &dst->config_terms); + list_add_tail(&tmp->list, dst); } return 0; } +static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src) +{ + return copy_config_terms(&dst->config_terms, &src->config_terms); +} + /** * evsel__clone - create a new evsel copied from @orig * @orig: original evsel @@ -1385,11 +1390,11 @@ int evsel__disable(struct evsel *evsel) return err; } -static void evsel__free_config_terms(struct evsel *evsel) +void free_config_terms(struct list_head *config_terms) { struct evsel_config_term *term, *h; - list_for_each_entry_safe(term, h, &evsel->config_terms, list) { + list_for_each_entry_safe(term, h, config_terms, list) { list_del_init(&term->list); if (term->free_str) zfree(&term->val.str); @@ -1397,6 +1402,11 @@ static void evsel__free_config_terms(struct evsel *evsel) } } +static void evsel__free_config_terms(struct evsel *evsel) +{ + free_config_terms(&evsel->config_terms); +} + void evsel__exit(struct evsel *evsel) { assert(list_empty(&evsel->core.node)); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 1b3eeab5f188..1f7edfa8568a 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -213,6 +213,9 @@ static inline struct evsel *evsel__new(struct perf_event_attr *attr) struct evsel *evsel__clone(struct evsel *orig); struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx); +int copy_config_terms(struct list_head *dst, struct list_head *src); +void free_config_terms(struct list_head *config_terms); + /* * Returns pointer with encoded error via <linux/err.h> interface. */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index da19be7da284..44e40bad0e33 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2149,6 +2149,7 @@ static int add_callchain_ip(struct thread *thread, al.filtered = 0; al.sym = NULL; + al.srcline = NULL; if (!cpumode) { thread__find_cpumode_addr_location(thread, ip, &al); } else { diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c index 10160ab126f9..b234d95fb10a 100644 --- a/tools/perf/util/parse-events-hybrid.c +++ b/tools/perf/util/parse-events-hybrid.c @@ -76,12 +76,16 @@ static int add_hw_hybrid(struct parse_events_state *parse_state, int ret; perf_pmu__for_each_hybrid_pmu(pmu) { + LIST_HEAD(terms); + if (pmu_cmp(parse_state, pmu)) continue; + copy_config_terms(&terms, config_terms); ret = create_event_hybrid(PERF_TYPE_HARDWARE, &parse_state->idx, list, attr, name, - config_terms, pmu); + &terms, pmu); + free_config_terms(&terms); if (ret) return ret; } @@ -115,11 +119,15 @@ static int add_raw_hybrid(struct parse_events_state *parse_state, int ret; perf_pmu__for_each_hybrid_pmu(pmu) { + LIST_HEAD(terms); + if (pmu_cmp(parse_state, pmu)) continue; + copy_config_terms(&terms, config_terms); ret = create_raw_event_hybrid(&parse_state->idx, list, attr, - name, config_terms, pmu); + name, &terms, pmu); + free_config_terms(&terms); if (ret) return ret; } @@ -165,11 +173,15 @@ int parse_events__add_cache_hybrid(struct list_head *list, int *idx, *hybrid = true; perf_pmu__for_each_hybrid_pmu(pmu) { + LIST_HEAD(terms); + if (pmu_cmp(parse_state, pmu)) continue; + copy_config_terms(&terms, config_terms); ret = create_event_hybrid(PERF_TYPE_HW_CACHE, idx, list, - attr, name, config_terms, pmu); + attr, name, &terms, pmu); + free_config_terms(&terms); if (ret) return ret; } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index e5eae23cfceb..51a2219df601 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -387,7 +387,7 @@ __add_event(struct list_head *list, int *idx, evsel->name = strdup(name); if (config_terms) - list_splice(config_terms, &evsel->config_terms); + list_splice_init(config_terms, &evsel->config_terms); if (list) list_add_tail(&evsel->core.node, list); @@ -535,9 +535,12 @@ int parse_events_add_cache(struct list_head *list, int *idx, config_name ? : name, &config_terms, &hybrid, parse_state); if (hybrid) - return ret; + goto out_free_terms; - return add_event(list, idx, &attr, config_name ? : name, &config_terms); + ret = add_event(list, idx, &attr, config_name ? : name, &config_terms); +out_free_terms: + free_config_terms(&config_terms); + return ret; } static void tracepoint_error(struct parse_events_error *e, int err, @@ -1457,10 +1460,13 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, get_config_name(head_config), &config_terms, &hybrid); if (hybrid) - return ret; + goto out_free_terms; - return add_event(list, &parse_state->idx, &attr, - get_config_name(head_config), &config_terms); + ret = add_event(list, &parse_state->idx, &attr, + get_config_name(head_config), &config_terms); +out_free_terms: + free_config_terms(&config_terms); + return ret; } int parse_events_add_tool(struct parse_events_state *parse_state, @@ -1608,14 +1614,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, } if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { - struct evsel_config_term *pos, *tmp; - - list_for_each_entry_safe(pos, tmp, &config_terms, list) { - list_del_init(&pos->list); - if (pos->free_str) - zfree(&pos->val.str); - free(pos); - } + free_config_terms(&config_terms); return -EINVAL; } diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 30481825515b..47b7531f51da 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -137,6 +137,9 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(cgroup, p_unsigned); PRINT_ATTRf(text_poke, p_unsigned); PRINT_ATTRf(build_id, p_unsigned); + PRINT_ATTRf(inherit_thread, p_unsigned); + PRINT_ATTRf(remove_on_exec, p_unsigned); + PRINT_ATTRf(sigtrap, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); @@ -150,7 +153,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(aux_watermark, p_unsigned); PRINT_ATTRf(sample_max_stack, p_unsigned); PRINT_ATTRf(aux_sample_size, p_unsigned); - PRINT_ATTRf(text_poke, p_unsigned); + PRINT_ATTRf(sig_data, p_unsigned); return ret; } diff --git a/tools/perf/util/sample-raw.c b/tools/perf/util/sample-raw.c index cde5cd3ce49b..f3f6bd9d290e 100644 --- a/tools/perf/util/sample-raw.c +++ b/tools/perf/util/sample-raw.c @@ -1,8 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <string.h> +#include <linux/string.h> #include "evlist.h" #include "env.h" +#include "header.h" #include "sample-raw.h" /* @@ -12,7 +14,13 @@ void evlist__init_trace_event_sample_raw(struct evlist *evlist) { const char *arch_pf = perf_env__arch(evlist->env); + const char *cpuid = perf_env__cpuid(evlist->env); if (arch_pf && !strcmp("s390", arch_pf)) evlist->trace_event_sample_raw = evlist__s390_sample_raw; + else if (arch_pf && !strcmp("x86", arch_pf) && + cpuid && strstarts(cpuid, "AuthenticAMD") && + evlist__has_amd_ibs(evlist)) { + evlist->trace_event_sample_raw = evlist__amd_sample_raw; + } } diff --git a/tools/perf/util/sample-raw.h b/tools/perf/util/sample-raw.h index 4be84a5510cf..ea01c5811503 100644 --- a/tools/perf/util/sample-raw.h +++ b/tools/perf/util/sample-raw.h @@ -6,6 +6,10 @@ struct evlist; union perf_event; struct perf_sample; -void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); +void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, + struct perf_sample *sample); +bool evlist__has_amd_ibs(struct evlist *evlist); +void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event, + struct perf_sample *sample); void evlist__init_trace_event_sample_raw(struct evlist *evlist); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 77fc46ca07c0..0fc9a5410739 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1581,10 +1581,6 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) if (bfd_get_flavour(abfd) == bfd_target_elf_flavour) goto out_close; - section = bfd_get_section_by_name(abfd, ".text"); - if (section) - dso->text_offset = section->vma - section->filepos; - symbols_size = bfd_get_symtab_upper_bound(abfd); if (symbols_size == 0) { bfd_close(abfd); @@ -1602,6 +1598,22 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) if (symbols_count < 0) goto out_free; + section = bfd_get_section_by_name(abfd, ".text"); + if (section) { + for (i = 0; i < symbols_count; ++i) { + if (!strcmp(bfd_asymbol_name(symbols[i]), "__ImageBase") || + !strcmp(bfd_asymbol_name(symbols[i]), "__image_base__")) + break; + } + if (i < symbols_count) { + /* PE symbols can only have 4 bytes, so use .text high bits */ + dso->text_offset = section->vma - (u32)section->vma; + dso->text_offset += (u32)bfd_asymbol_value(symbols[i]); + } else { + dso->text_offset = section->vma - section->filepos; + } + } + qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue); #ifdef bfd_get_section diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 033051717ba5..f3daa44a8266 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -12,27 +12,36 @@ #include <unistd.h> #include <ftw.h> - #include "cgroup_helpers.h" /* * To avoid relying on the system setup, when setup_cgroup_env is called - * we create a new mount namespace, and cgroup namespace. The cgroup2 - * root is mounted at CGROUP_MOUNT_PATH - * - * Unfortunately, most people don't have cgroupv2 enabled at this point in time. - * It's easier to create our own mount namespace and manage it ourselves. + * we create a new mount namespace, and cgroup namespace. The cgroupv2 + * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't + * have cgroupv2 enabled at this point in time. It's easier to create our + * own mount namespace and manage it ourselves. We assume /mnt exists. * - * We assume /mnt exists. + * Related cgroupv1 helpers are named *classid*(), since we only use the + * net_cls controller for tagging net_cls.classid. We assume the default + * mount under /sys/fs/cgroup/net_cls, which should be the case for the + * vast majority of users. */ #define WALK_FD_LIMIT 16 + #define CGROUP_MOUNT_PATH "/mnt" +#define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" +#define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls" #define CGROUP_WORK_DIR "/cgroup-test-work-dir" + #define format_cgroup_path(buf, path) \ snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \ CGROUP_WORK_DIR, path) +#define format_classid_path(buf) \ + snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ + CGROUP_WORK_DIR) + /** * enable_all_controllers() - Enable all available cgroup v2 controllers * @@ -139,8 +148,7 @@ static int nftwfunc(const char *filename, const struct stat *statptr, return 0; } - -static int join_cgroup_from_top(char *cgroup_path) +static int join_cgroup_from_top(const char *cgroup_path) { char cgroup_procs_path[PATH_MAX + 1]; pid_t pid = getpid(); @@ -313,3 +321,114 @@ int cgroup_setup_and_join(const char *path) { } return cg_fd; } + +/** + * setup_classid_environment() - Setup the cgroupv1 net_cls environment + * + * After calling this function, cleanup_classid_environment should be called + * once testing is complete. + * + * This function will print an error to stderr and return 1 if it is unable + * to setup the cgroup environment. If setup is successful, 0 is returned. + */ +int setup_classid_environment(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_classid_path(cgroup_workdir); + + if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) && + errno != EBUSY) { + log_err("mount cgroup base"); + return 1; + } + + if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) { + log_err("mkdir cgroup net_cls"); + return 1; + } + + if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") && + errno != EBUSY) { + log_err("mount cgroup net_cls"); + return 1; + } + + cleanup_classid_environment(); + + if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { + log_err("mkdir cgroup work dir"); + return 1; + } + + return 0; +} + +/** + * set_classid() - Set a cgroupv1 net_cls classid + * @id: the numeric classid + * + * Writes the passed classid into the cgroup work dir's net_cls.classid + * file in order to later on trigger socket tagging. + * + * On success, it returns 0, otherwise on failure it returns 1. If there + * is a failure, it prints the error to stderr. + */ +int set_classid(unsigned int id) +{ + char cgroup_workdir[PATH_MAX - 42]; + char cgroup_classid_path[PATH_MAX + 1]; + int fd, rc = 0; + + format_classid_path(cgroup_workdir); + snprintf(cgroup_classid_path, sizeof(cgroup_classid_path), + "%s/net_cls.classid", cgroup_workdir); + + fd = open(cgroup_classid_path, O_WRONLY); + if (fd < 0) { + log_err("Opening cgroup classid: %s", cgroup_classid_path); + return 1; + } + + if (dprintf(fd, "%u\n", id) < 0) { + log_err("Setting cgroup classid"); + rc = 1; + } + + close(fd); + return rc; +} + +/** + * join_classid() - Join a cgroupv1 net_cls classid + * + * This function expects the cgroup work dir to be already created, as we + * join it here. This causes the process sockets to be tagged with the given + * net_cls classid. + * + * On success, it returns 0, otherwise on failure it returns 1. + */ +int join_classid(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_classid_path(cgroup_workdir); + return join_cgroup_from_top(cgroup_workdir); +} + +/** + * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment + * + * At call time, it moves the calling process to the root cgroup, and then + * runs the deletion process. + * + * On failure, it will print an error to stderr, and try to continue. + */ +void cleanup_classid_environment(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_classid_path(cgroup_workdir); + join_cgroup_from_top(NETCLS_MOUNT_PATH); + nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); +} diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 5fe3d88e4f0d..629da3854b3e 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __CGROUP_HELPERS_H #define __CGROUP_HELPERS_H + #include <errno.h> #include <string.h> @@ -8,12 +9,21 @@ #define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) - +/* cgroupv2 related */ int cgroup_setup_and_join(const char *path); int create_and_get_cgroup(const char *path); +unsigned long long get_cgroup_id(const char *path); + int join_cgroup(const char *path); + int setup_cgroup_environment(void); void cleanup_cgroup_environment(void); -unsigned long long get_cgroup_id(const char *path); -#endif +/* cgroupv1 related */ +int set_classid(unsigned int id); +int join_classid(void); + +int setup_classid_environment(void); +void cleanup_classid_environment(void); + +#endif /* __CGROUP_HELPERS_H */ diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 7e9f6375757a..6db1af8fdee7 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -208,11 +208,26 @@ error_close: static int connect_fd_to_addr(int fd, const struct sockaddr_storage *addr, - socklen_t addrlen) + socklen_t addrlen, const bool must_fail) { - if (connect(fd, (const struct sockaddr *)addr, addrlen)) { - log_err("Failed to connect to server"); - return -1; + int ret; + + errno = 0; + ret = connect(fd, (const struct sockaddr *)addr, addrlen); + if (must_fail) { + if (!ret) { + log_err("Unexpected success to connect to server"); + return -1; + } + if (errno != EPERM) { + log_err("Unexpected error from connect to server"); + return -1; + } + } else { + if (ret) { + log_err("Failed to connect to server"); + return -1; + } } return 0; @@ -257,7 +272,7 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) strlen(opts->cc) + 1)) goto error_close; - if (connect_fd_to_addr(fd, &addr, addrlen)) + if (connect_fd_to_addr(fd, &addr, addrlen, opts->must_fail)) goto error_close; return fd; @@ -289,7 +304,7 @@ int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms) return -1; } - if (connect_fd_to_addr(client_fd, &addr, len)) + if (connect_fd_to_addr(client_fd, &addr, len, false)) return -1; return 0; diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index da7e132657d5..d198181a5648 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -20,6 +20,7 @@ typedef __u16 __sum16; struct network_helper_opts { const char *cc; int timeout_ms; + bool must_fail; }; /* ipv4 test vector */ diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c new file mode 100644 index 000000000000..ab3b9bc5e6d1 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> + +#include "connect4_dropper.skel.h" + +#include "cgroup_helpers.h" +#include "network_helpers.h" + +static int run_test(int cgroup_fd, int server_fd, bool classid) +{ + struct network_helper_opts opts = { + .must_fail = true, + }; + struct connect4_dropper *skel; + int fd, err = 0; + + skel = connect4_dropper__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return -1; + + skel->links.connect_v4_dropper = + bpf_program__attach_cgroup(skel->progs.connect_v4_dropper, + cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.connect_v4_dropper, "prog_attach")) { + err = -1; + goto out; + } + + if (classid && !ASSERT_OK(join_classid(), "join_classid")) { + err = -1; + goto out; + } + + fd = connect_to_fd_opts(server_fd, &opts); + if (fd < 0) + err = -1; + else + close(fd); +out: + connect4_dropper__destroy(skel); + return err; +} + +void test_cgroup_v1v2(void) +{ + struct network_helper_opts opts = {}; + int server_fd, client_fd, cgroup_fd; + static const int port = 60123; + + /* Step 1: Check base connectivity works without any BPF. */ + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); + if (!ASSERT_GE(server_fd, 0, "server_fd")) + return; + client_fd = connect_to_fd_opts(server_fd, &opts); + if (!ASSERT_GE(client_fd, 0, "client_fd")) { + close(server_fd); + return; + } + close(client_fd); + close(server_fd); + + /* Step 2: Check BPF policy prog attached to cgroups drops connectivity. */ + cgroup_fd = test__join_cgroup("/connect_dropper"); + if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd")) + return; + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); + if (!ASSERT_GE(server_fd, 0, "server_fd")) { + close(cgroup_fd); + return; + } + ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only"); + setup_classid_environment(); + set_classid(42); + ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2"); + cleanup_classid_environment(); + close(server_fd); + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c index 53f0e0fa1a53..37c20b5ffa70 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c +++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE #include <test_progs.h> -#include <linux/ptrace.h> #include "test_task_pt_regs.skel.h" void test_task_pt_regs(void) diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c new file mode 100644 index 000000000000..b565d997810a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <string.h> + +#include <linux/stddef.h> +#include <linux/bpf.h> + +#include <sys/socket.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#define VERDICT_REJECT 0 +#define VERDICT_PROCEED 1 + +SEC("cgroup/connect4") +int connect_v4_dropper(struct bpf_sock_addr *ctx) +{ + if (ctx->type != SOCK_STREAM) + return VERDICT_PROCEED; + if (ctx->user_port == bpf_htons(60123)) + return VERDICT_REJECT; + return VERDICT_PROCEED; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c index 6c059f1cfa1b..e6cb09259408 100644 --- a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c +++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c @@ -1,12 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/ptrace.h> -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -struct pt_regs current_regs = {}; -struct pt_regs ctx_regs = {}; +#define PT_REGS_SIZE sizeof(struct pt_regs) + +/* + * The kernel struct pt_regs isn't exported in its entirety to userspace. + * Pass it as an array to task_pt_regs.c + */ +char current_regs[PT_REGS_SIZE] = {}; +char ctx_regs[PT_REGS_SIZE] = {}; int uprobe_res = 0; SEC("uprobe/trigger_func") @@ -17,8 +22,10 @@ int handle_uprobe(struct pt_regs *ctx) current = bpf_get_current_task_btf(); regs = (struct pt_regs *) bpf_task_pt_regs(current); - __builtin_memcpy(¤t_regs, regs, sizeof(*regs)); - __builtin_memcpy(&ctx_regs, ctx, sizeof(*ctx)); + if (bpf_probe_read_kernel(current_regs, PT_REGS_SIZE, regs)) + return 0; + if (bpf_probe_read_kernel(ctx_regs, PT_REGS_SIZE, ctx)) + return 0; /* Prove that uprobe was run */ uprobe_res = 1; diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c index e1bf55dabdf6..162c41e9bcae 100644 --- a/tools/testing/selftests/nci/nci_dev.c +++ b/tools/testing/selftests/nci/nci_dev.c @@ -746,7 +746,7 @@ int read_write_nci_cmd(int nfc_sock, int virtual_fd, const __u8 *cmd, __u32 cmd_ const __u8 *rsp, __u32 rsp_len) { char buf[256]; - unsigned int len; + int len; send(nfc_sock, &cmd[3], cmd_len - 3, 0); len = read(virtual_fd, buf, cmd_len); diff --git a/tools/testing/selftests/net/altnames.sh b/tools/testing/selftests/net/altnames.sh index 4254ddc3f70b..1ef9e4159bba 100755 --- a/tools/testing/selftests/net/altnames.sh +++ b/tools/testing/selftests/net/altnames.sh @@ -45,7 +45,7 @@ altnames_test() check_err $? "Got unexpected long alternative name from link show JSON" ip link property del $DUMMY_DEV altname $SHORT_NAME - check_err $? "Failed to add short alternative name" + check_err $? "Failed to delete short alternative name" ip -j -p link show $SHORT_NAME &>/dev/null check_fail $? "Unexpected success while trying to do link show with deleted short alternative name" diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S index bd1ca25febe4..aed632d29fff 100644 --- a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S +++ b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#include <ppc-asm.h> +#include <basic_asm.h> #include <asm/unistd.h> .text @@ -26,3 +26,38 @@ FUNC_START(getppid_tm_suspended) 1: li r3, -1 blr + + +.macro scv level + .long (0x44000001 | (\level) << 5) +.endm + +FUNC_START(getppid_scv_tm_active) + PUSH_BASIC_STACK(0) + tbegin. + beq 1f + li r0, __NR_getppid + scv 0 + tend. + POP_BASIC_STACK(0) + blr +1: + li r3, -1 + POP_BASIC_STACK(0) + blr + +FUNC_START(getppid_scv_tm_suspended) + PUSH_BASIC_STACK(0) + tbegin. + beq 1f + li r0, __NR_getppid + tsuspend. + scv 0 + tresume. + tend. + POP_BASIC_STACK(0) + blr +1: + li r3, -1 + POP_BASIC_STACK(0) + blr diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c index 467a6b3134b2..b763354c2eb4 100644 --- a/tools/testing/selftests/powerpc/tm/tm-syscall.c +++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c @@ -19,23 +19,36 @@ #include "utils.h" #include "tm.h" +#ifndef PPC_FEATURE2_SCV +#define PPC_FEATURE2_SCV 0x00100000 /* scv syscall */ +#endif + extern int getppid_tm_active(void); extern int getppid_tm_suspended(void); +extern int getppid_scv_tm_active(void); +extern int getppid_scv_tm_suspended(void); unsigned retries = 0; #define TEST_DURATION 10 /* seconds */ -pid_t getppid_tm(bool suspend) +pid_t getppid_tm(bool scv, bool suspend) { int i; pid_t pid; for (i = 0; i < TM_RETRIES; i++) { - if (suspend) - pid = getppid_tm_suspended(); - else - pid = getppid_tm_active(); + if (suspend) { + if (scv) + pid = getppid_scv_tm_suspended(); + else + pid = getppid_tm_suspended(); + } else { + if (scv) + pid = getppid_scv_tm_active(); + else + pid = getppid_tm_active(); + } if (pid >= 0) return pid; @@ -82,15 +95,24 @@ int tm_syscall(void) * Test a syscall within a suspended transaction and verify * that it succeeds. */ - FAIL_IF(getppid_tm(true) == -1); /* Should succeed. */ + FAIL_IF(getppid_tm(false, true) == -1); /* Should succeed. */ /* * Test a syscall within an active transaction and verify that * it fails with the correct failure code. */ - FAIL_IF(getppid_tm(false) != -1); /* Should fail... */ + FAIL_IF(getppid_tm(false, false) != -1); /* Should fail... */ FAIL_IF(!failure_is_persistent()); /* ...persistently... */ FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */ + + /* Now do it all again with scv if it is available. */ + if (have_hwcap2(PPC_FEATURE2_SCV)) { + FAIL_IF(getppid_tm(true, true) == -1); /* Should succeed. */ + FAIL_IF(getppid_tm(true, false) != -1); /* Should fail... */ + FAIL_IF(!failure_is_persistent()); /* ...persistently... */ + FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */ + } + gettimeofday(&now, 0); } diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 67766bfe176f..2a3638c0a008 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -282,6 +282,7 @@ static void test_stream_msg_peek_server(const struct test_opts *opts) } #define MESSAGES_CNT 7 +#define MSG_EOR_IDX (MESSAGES_CNT / 2) static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) { int fd; @@ -294,7 +295,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) /* Send several messages, one with MSG_EOR flag */ for (int i = 0; i < MESSAGES_CNT; i++) - send_byte(fd, 1, 0); + send_byte(fd, 1, (i == MSG_EOR_IDX) ? MSG_EOR : 0); control_writeln("SENDDONE"); close(fd); @@ -324,6 +325,11 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) perror("message bound violated"); exit(EXIT_FAILURE); } + + if ((i == MSG_EOR_IDX) ^ !!(msg.msg_flags & MSG_EOR)) { + perror("MSG_EOR"); + exit(EXIT_FAILURE); + } } close(fd); diff --git a/tools/thermal/tmon/Makefile b/tools/thermal/tmon/Makefile index 9db867df7679..f9c52b7fab7b 100644 --- a/tools/thermal/tmon/Makefile +++ b/tools/thermal/tmon/Makefile @@ -10,10 +10,9 @@ override CFLAGS+= $(call cc-option,-O3,-O1) ${WARNFLAGS} # Add "-fstack-protector" only if toolchain supports it. override CFLAGS+= $(call cc-option,-fstack-protector-strong) CC?= $(CROSS_COMPILE)gcc -PKG_CONFIG?= pkg-config +PKG_CONFIG?= $(CROSS_COMPILE)pkg-config override CFLAGS+=-D VERSION=\"$(VERSION)\" -LDFLAGS+= TARGET=tmon INSTALL_PROGRAM=install -m 755 -p @@ -33,7 +32,6 @@ override CFLAGS += $(shell $(PKG_CONFIG) --cflags $(STATIC) panelw ncursesw 2> / $(PKG_CONFIG) --cflags $(STATIC) panel ncurses 2> /dev/null) OBJS = tmon.o tui.o sysfs.o pid.o -OBJS += tmon: $(OBJS) Makefile tmon.h $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) -o $(TARGET) $(TMON_LIBS) @@ -42,15 +40,13 @@ valgrind: tmon sudo valgrind -v --track-origins=yes --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20 --track-fds=yes ./$(TARGET) 1> /dev/null install: - - mkdir -p $(INSTALL_ROOT)/$(BINDIR) - - $(INSTALL_PROGRAM) "$(TARGET)" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)" + - $(INSTALL_PROGRAM) -D "$(TARGET)" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)" uninstall: $(DEL_FILE) "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)" clean: - find . -name "*.o" | xargs $(DEL_FILE) - rm -f $(TARGET) + rm -f $(TARGET) $(OBJS) dist: git tag v$(VERSION) |